diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 818a322762c6..0be0b11b0d12 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -8014,8 +8014,8 @@ multiclass avx512_cvtph2ps_sae, - avx512_cvtph2ps_sae, + WriteCvtPH2PSZ>, + avx512_cvtph2ps_sae, EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; let Predicates = [HasVLX] in { @@ -8068,8 +8068,8 @@ multiclass avx512_cvtps2ph_sae, - avx512_cvtps2ph_sae, + WriteCvtPS2PHZ, WriteCvtPS2PHZSt>, + avx512_cvtps2ph_sae, EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; let Predicates = [HasVLX] in { defm VCVTPS2PHZ256 : avx512_cvtps2ph; defm : BWWriteResPair; // Floating point add/sub. defm : BWWriteResPair; // Floating point add/sub (XMM). defm : BWWriteResPair; // Floating point add/sub (YMM/ZMM). +defm : X86WriteResPairUnsupported; defm : BWWriteResPair; // Floating point double add/sub. defm : BWWriteResPair; // Floating point double add/sub (XMM). defm : BWWriteResPair; // Floating point double add/sub (YMM/ZMM). +defm : X86WriteResPairUnsupported; defm : BWWriteResPair; // Floating point compare. defm : BWWriteResPair; // Floating point compare (XMM). defm : BWWriteResPair; // Floating point compare (YMM/ZMM). +defm : X86WriteResPairUnsupported; defm : BWWriteResPair; // Floating point double compare. defm : BWWriteResPair; // Floating point double compare (XMM). defm : BWWriteResPair; // Floating point double compare (YMM/ZMM). +defm : X86WriteResPairUnsupported; defm : BWWriteResPair; // Floating point compare to flags. defm : BWWriteResPair; // Floating point multiplication. defm : BWWriteResPair; // Floating point multiplication (XMM). defm : BWWriteResPair; // Floating point multiplication (YMM/ZMM). +defm : X86WriteResPairUnsupported; defm : BWWriteResPair; // Floating point double multiplication. defm : BWWriteResPair; // Floating point double multiplication (XMM). defm : BWWriteResPair; // Floating point double multiplication (YMM/ZMM). +defm : X86WriteResPairUnsupported; //defm : BWWriteResPair; // Floating point division. defm : BWWriteResPair; // Floating point division (XMM). @@ -233,34 +239,45 @@ defm : BWWriteResPair; // Float defm : BWWriteResPair; // Floating point reciprocal estimate. defm : BWWriteResPair; // Floating point reciprocal estimate (XMM). defm : BWWriteResPair; // Floating point reciprocal estimate (YMM/ZMM). +defm : X86WriteResPairUnsupported; defm : BWWriteResPair; // Floating point reciprocal square root estimate. defm : BWWriteResPair; // Floating point reciprocal square root estimate (XMM). defm : BWWriteResPair; // Floating point reciprocal square root estimate (YMM/ZMM). +defm : X86WriteResPairUnsupported; defm : BWWriteResPair; // Fused Multiply Add. defm : BWWriteResPair; // Fused Multiply Add (XMM). defm : BWWriteResPair; // Fused Multiply Add (YMM/ZMM). +defm : X86WriteResPairUnsupported; defm : BWWriteResPair; // Floating point double dot product. defm : BWWriteResPair; // Floating point single dot product. defm : BWWriteResPair; // Floating point single dot product (YMM). +defm : X86WriteResPairUnsupported; defm : BWWriteResPair; // Floating point fabs/fchs. defm : X86WriteRes; // Floating point rounding. defm : X86WriteRes; // Floating point rounding (YMM/ZMM). +defm : X86WriteResPairUnsupported; defm : X86WriteRes; defm : X86WriteRes; defm : BWWriteResPair; // Floating point and/or/xor logicals. defm : BWWriteResPair; // Floating point and/or/xor logicals (YMM/ZMM). +defm : X86WriteResPairUnsupported; defm : BWWriteResPair; // Floating point TEST instructions. defm : BWWriteResPair; // Floating point TEST instructions (YMM/ZMM). +defm : X86WriteResPairUnsupported; defm : BWWriteResPair; // Floating point vector shuffles. defm : BWWriteResPair; // Floating point vector shuffles (YMM/ZMM). +defm : X86WriteResPairUnsupported; defm : BWWriteResPair; // Floating point vector variable shuffles. defm : BWWriteResPair; // Floating point vector variable shuffles. +defm : X86WriteResPairUnsupported; defm : BWWriteResPair; // Floating point vector blends. defm : BWWriteResPair; // Floating point vector blends. +defm : X86WriteResPairUnsupported; defm : BWWriteResPair; // Fp vector variable blends. defm : BWWriteResPair; // Fp vector variable blends. +defm : X86WriteResPairUnsupported; // FMA Scheduling helper class. // class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } @@ -291,31 +308,42 @@ defm : X86WriteRes; // Vector integer ALU op, no logicals. defm : BWWriteResPair; // Vector integer ALU op, no logicals. defm : BWWriteResPair; // Vector integer ALU op, no logicals (YMM/ZMM). +defm : X86WriteResPairUnsupported; defm : BWWriteResPair; // Vector integer and/or/xor. defm : BWWriteResPair; // Vector integer and/or/xor. defm : BWWriteResPair; // Vector integer and/or/xor (YMM/ZMM). +defm : X86WriteResPairUnsupported; defm : BWWriteResPair; // Vector integer TEST instructions. defm : BWWriteResPair; // Vector integer TEST instructions (YMM/ZMM). +defm : X86WriteResPairUnsupported; defm : BWWriteResPair; // Vector integer multiply. defm : BWWriteResPair; // Vector integer multiply. defm : BWWriteResPair; // Vector integer multiply. +defm : X86WriteResPairUnsupported; defm : BWWriteResPair; // Vector PMULLD. defm : BWWriteResPair; // Vector PMULLD (YMM/ZMM). +defm : X86WriteResPairUnsupported; defm : BWWriteResPair; // Vector shuffles. defm : BWWriteResPair; // Vector shuffles. defm : BWWriteResPair; // Vector shuffles (YMM/ZMM). +defm : X86WriteResPairUnsupported; defm : BWWriteResPair; // Vector variable shuffles. defm : BWWriteResPair; // Vector variable shuffles. defm : BWWriteResPair; // Vector variable shuffles (YMM/ZMM). +defm : X86WriteResPairUnsupported; defm : BWWriteResPair; // Vector blends. defm : BWWriteResPair; // Vector blends (YMM/ZMM). +defm : X86WriteResPairUnsupported; defm : BWWriteResPair; // Vector variable blends. defm : BWWriteResPair; // Vector variable blends (YMM/ZMM). +defm : X86WriteResPairUnsupported; defm : BWWriteResPair; // Vector MPSAD. defm : BWWriteResPair; // Vector MPSAD. +defm : X86WriteResPairUnsupported; defm : BWWriteResPair; // Vector PSADBW. defm : BWWriteResPair; // Vector PSADBW. defm : BWWriteResPair; // Vector PSADBW (YMM/ZMM). +defm : X86WriteResPairUnsupported; defm : BWWriteResPair; // Vector PHMINPOS. // Vector integer shifts. @@ -323,12 +351,15 @@ defm : BWWriteResPair; defm : BWWriteResPair; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteResPairUnsupported; defm : BWWriteResPair; defm : BWWriteResPair; // Vector integer immediate shifts (XMM). defm : BWWriteResPair; // Vector integer immediate shifts (YMM/ZMM). +defm : X86WriteResPairUnsupported; defm : BWWriteResPair; // Variable vector shifts. defm : BWWriteResPair; // Variable vector shifts (YMM/ZMM). +defm : X86WriteResPairUnsupported; // Vector insert/extract operations. def : WriteRes { @@ -354,33 +385,43 @@ def : WriteRes { defm : BWWriteResPair; defm : BWWriteResPair; defm : BWWriteResPair; +defm : X86WriteResPairUnsupported; defm : BWWriteResPair; defm : BWWriteResPair; defm : BWWriteResPair; +defm : X86WriteResPairUnsupported; defm : BWWriteResPair; defm : BWWriteResPair; defm : BWWriteResPair; +defm : X86WriteResPairUnsupported; defm : BWWriteResPair; defm : BWWriteResPair; defm : BWWriteResPair; +defm : X86WriteResPairUnsupported; defm : BWWriteResPair; defm : BWWriteResPair; defm : BWWriteResPair; +defm : X86WriteResPairUnsupported; defm : BWWriteResPair; defm : BWWriteResPair; defm : BWWriteResPair; +defm : X86WriteResPairUnsupported; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteResUnsupported; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteResUnsupported; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteResUnsupported; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteResUnsupported; // Strings instructions. diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 0001ffb755f2..73273f0e9d1d 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -185,25 +185,31 @@ defm : X86WriteRes; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; @@ -217,10 +223,12 @@ defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; @@ -235,60 +243,80 @@ defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 // Conversion between integer and float. defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 // Vector integer operations. defm : X86WriteRes; @@ -314,46 +342,61 @@ defm : X86WriteRes; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; // Vector integer shifts. defm : HWWriteResPair; defm : HWWriteResPair; defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 // Vector insert/extract operations. def : WriteRes { diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index a7795021b6a8..396706e5f468 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -176,25 +176,31 @@ defm : X86WriteRes; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; @@ -208,10 +214,12 @@ defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; @@ -226,58 +234,78 @@ defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 // Conversion between integer and float. defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : X86WriteRes; defm : X86WriteRes; defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : SBWriteResPair; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 // Vector integer operations. defm : X86WriteRes; @@ -303,42 +331,56 @@ defm : X86WriteRes; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; // TODO this is probably wrong for 256/512-bit for the "generic" model +defm : SBWriteResPair; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 defm : SBWriteResPair; // Vector integer shifts. defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 // Vector insert/extract operations. def : WriteRes { @@ -365,9 +407,11 @@ def : WriteRes { defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 //////////////////////////////////////////////////////////////////////////////// // String instructions. @@ -484,6 +528,7 @@ defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Unsupported = 1 // Remaining SNB instrs. diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 761b9b8c9757..952db0507997 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -183,76 +183,93 @@ defm : X86WriteRes; defm : X86WriteRes; defm : SKLWriteResPair; // Floating point add/sub. -defm : SKLWriteResPair; // Floating point add/sub (XMM). -defm : SKLWriteResPair; // Floating point add/sub (YMM/ZMM). +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; // Floating point double add/sub. -defm : SKLWriteResPair; // Floating point double add/sub (XMM). -defm : SKLWriteResPair; // Floating point double add/sub (YMM/ZMM). +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; // Floating point compare. -defm : SKLWriteResPair; // Floating point compare (XMM). -defm : SKLWriteResPair; // Floating point compare (YMM/ZMM). +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; // Floating point double compare. -defm : SKLWriteResPair; // Floating point double compare (XMM). -defm : SKLWriteResPair; // Floating point double compare (YMM/ZMM). +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; // Floating point compare to flags. defm : SKLWriteResPair; // Floating point multiplication. -defm : SKLWriteResPair; // Floating point multiplication (XMM). -defm : SKLWriteResPair; // Floating point multiplication (YMM/ZMM). +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; // Floating point double multiplication. -defm : SKLWriteResPair; // Floating point double multiplication (XMM). -defm : SKLWriteResPair; // Floating point double multiplication (YMM/ZMM). +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; // Floating point division. -//defm : SKLWriteResPair; // Floating point division (XMM). -defm : SKLWriteResPair; // Floating point division (YMM). +//defm : SKLWriteResPair; +defm : SKLWriteResPair; defm : X86WriteResPairUnsupported; //defm : SKLWriteResPair; // Floating point double division. -//defm : SKLWriteResPair; // Floating point double division (XMM). -//defm : SKLWriteResPair; // Floating point double division (YMM). +//defm : SKLWriteResPair; +//defm : SKLWriteResPair; defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; // Floating point square root. -defm : SKLWriteResPair; // Floating point square root (XMM). -defm : SKLWriteResPair; // Floating point square root (YMM). +defm : SKLWriteResPair; +defm : SKLWriteResPair; defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; // Floating point double square root. -defm : SKLWriteResPair; // Floating point double square root (XMM). -defm : SKLWriteResPair; // Floating point double square root (YMM). +defm : SKLWriteResPair; +defm : SKLWriteResPair; defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; // Floating point long double square root. defm : SKLWriteResPair; // Floating point reciprocal estimate. -defm : SKLWriteResPair; // Floating point reciprocal estimate (XMM). -defm : SKLWriteResPair; // Floating point reciprocal estimate (YMM/ZMM). +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; // Floating point reciprocal square root estimate. -defm : SKLWriteResPair; // Floating point reciprocal square root estimate (XMM). -defm : SKLWriteResPair; // Floating point reciprocal square root estimate (YMM/ZMM). +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; // Fused Multiply Add. -defm : SKLWriteResPair; // Fused Multiply Add (XMM). -defm : SKLWriteResPair; // Fused Multiply Add (YMM/ZMM). +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; // Floating point double dot product. -defm : SKLWriteResPair; // Floating point single dot product. -defm : SKLWriteResPair; // Floating point single dot product (YMM). +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; // Floating point fabs/fchs. defm : SKLWriteResPair; // Floating point rounding. -defm : SKLWriteResPair; // Floating point rounding (YMM/ZMM). +defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; // Floating point and/or/xor logicals. -defm : SKLWriteResPair; // Floating point and/or/xor logicals (YMM/ZMM). +defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; // Floating point TEST instructions. -defm : SKLWriteResPair; // Floating point TEST instructions (YMM/ZMM). +defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; // Floating point vector shuffles. -defm : SKLWriteResPair; // Floating point vector shuffles (YMM/ZMM). +defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; // Floating point vector shuffles. -defm : SKLWriteResPair; // Floating point vector shuffles. +defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; // Floating point vector blends. -defm : SKLWriteResPair; // Floating point vector blends. +defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; // Fp vector variable blends. -defm : SKLWriteResPair; // Fp vector variable blends. +defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; // FMA Scheduling helper class. // class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } @@ -279,33 +296,44 @@ defm : X86WriteRes; defm : X86WriteRes; defm : SKLWriteResPair; // Vector integer ALU op, no logicals. -defm : SKLWriteResPair; // Vector integer ALU op, no logicals (XMM). -defm : SKLWriteResPair; // Vector integer ALU op, no logicals (YMM/ZMM). +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; // Vector integer and/or/xor. -defm : SKLWriteResPair; // Vector integer and/or/xor (XMM). -defm : SKLWriteResPair; // Vector integer and/or/xor (YMM/ZMM). +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; // Vector integer TEST instructions. -defm : SKLWriteResPair; // Vector integer TEST instructions (YMM/ZMM). +defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; // Vector integer multiply. -defm : SKLWriteResPair; // Vector integer multiply (XMM). -defm : SKLWriteResPair; // Vector integer multiply (YMM/ZMM). +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; // Vector PMULLD. -defm : SKLWriteResPair; // Vector PMULLD (YMM/ZMM). +defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; // Vector shuffles. -defm : SKLWriteResPair; // Vector shuffles (XMM). -defm : SKLWriteResPair; // Vector shuffles (YMM/ZMM). +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; // Vector shuffles. -defm : SKLWriteResPair; // Vector shuffles (XMM). -defm : SKLWriteResPair; // Vector shuffles (YMM/ZMM). +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; // Vector blends. -defm : SKLWriteResPair; // Vector blends (YMM/ZMM). +defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; // Vector variable blends. -defm : SKLWriteResPair; // Vector variable blends (YMM/ZMM). +defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; // Vector MPSAD. -defm : SKLWriteResPair; // Vector MPSAD (YMM/ZMM). +defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; // Vector PSADBW. -defm : SKLWriteResPair; // Vector PSADBW (XMM). -defm : SKLWriteResPair; // Vector PSADBW (YMM/ZMM). +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; // Vector PHMINPOS. // Vector integer shifts. @@ -314,12 +342,15 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteResPairUnsupported; -defm : SKLWriteResPair; -defm : SKLWriteResPair; // Vector integer immediate shifts (XMM). -defm : SKLWriteResPair; // Vector integer immediate shifts (YMM/ZMM). +defm : SKLWriteResPair; // Vector integer immediate shifts. +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; // Variable vector shifts. -defm : SKLWriteResPair; // Variable vector shifts (YMM/ZMM). +defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; // Vector insert/extract operations. def : WriteRes { @@ -346,33 +377,43 @@ def : WriteRes { defm : SKLWriteResPair; defm : SKLWriteResPair; defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; defm : SKLWriteResPair; defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; defm : SKLWriteResPair; defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; defm : SKLWriteResPair; defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; defm : SKLWriteResPair; defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; defm : SKLWriteResPair; defm : SKLWriteResPair; +defm : X86WriteResPairUnsupported; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteResUnsupported; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteResUnsupported; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteResUnsupported; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteResUnsupported; // Strings instructions. diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 8ee58e781092..9080d847a7d4 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -182,77 +182,94 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : SKXWriteResPair; // Floating point add/sub. -defm : SKXWriteResPair; // Floating point add/sub (XMM). -defm : SKXWriteResPair; // Floating point add/sub (YMM/ZMM). -defm : SKXWriteResPair; // Floating point double add/sub. -defm : SKXWriteResPair; // Floating point double add/sub (XMM). -defm : SKXWriteResPair; // Floating point double add/sub (YMM/ZMM). +defm : SKXWriteResPair; // Floating point add/sub. +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; // Floating point double add/sub. +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; -defm : SKXWriteResPair; // Floating point compare. -defm : SKXWriteResPair; // Floating point compare (XMM). -defm : SKXWriteResPair; // Floating point compare (YMM/ZMM). -defm : SKXWriteResPair; // Floating point double compare. -defm : SKXWriteResPair; // Floating point double compare (XMM). -defm : SKXWriteResPair; // Floating point double compare (YMM/ZMM). +defm : SKXWriteResPair; // Floating point compare. +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; // Floating point double compare. +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; // Floating point compare to flags. -defm : SKXWriteResPair; // Floating point multiplication. -defm : SKXWriteResPair; // Floating point multiplication (XMM). -defm : SKXWriteResPair; // Floating point multiplication (YMM/ZMM). -defm : SKXWriteResPair; // Floating point double multiplication. -defm : SKXWriteResPair; // Floating point double multiplication (XMM). -defm : SKXWriteResPair; // Floating point double multiplication (YMM/ZMM). +defm : SKXWriteResPair; // Floating point multiplication. +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; // Floating point double multiplication. +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; // 10-14 cycles. // Floating point division. -//defm : SKXWriteResPair; // 10-14 cycles. // Floating point division (XMM). -defm : SKXWriteResPair; // 10-14 cycles. // Floating point division (YMM). -defm : SKXWriteResPair; // 10-14 cycles. // Floating point division (ZMM). +//defm : SKXWriteResPair; // 10-14 cycles. +defm : SKXWriteResPair; // 10-14 cycles. +defm : SKXWriteResPair; // 10-14 cycles. //defm : SKXWriteResPair; // 10-14 cycles. // Floating point division. -//defm : SKXWriteResPair; // 10-14 cycles. // Floating point division (XMM). -//defm : SKXWriteResPair; // 10-14 cycles. // Floating point division (YMM). -defm : SKXWriteResPair; // 10-14 cycles. // Floating point division (ZMM). +//defm : SKXWriteResPair; // 10-14 cycles. +//defm : SKXWriteResPair; // 10-14 cycles. +defm : SKXWriteResPair; // 10-14 cycles. defm : SKXWriteResPair; // Floating point square root. -defm : SKXWriteResPair; // Floating point square root (XMM). -defm : SKXWriteResPair; // Floating point square root (YMM). -defm : SKXWriteResPair; // Floating point square root (ZMM). +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; // Floating point double square root. -defm : SKXWriteResPair; // Floating point double square root (XMM). -defm : SKXWriteResPair; // Floating point double square root (YMM). -defm : SKXWriteResPair; // Floating point double square root (ZMM). +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; // Floating point long double square root. defm : SKXWriteResPair; // Floating point reciprocal estimate. -defm : SKXWriteResPair; // Floating point reciprocal estimate (XMM). -defm : SKXWriteResPair; // Floating point reciprocal estimate (YMM/ZMM). +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; // Floating point reciprocal square root estimate. -defm : SKXWriteResPair; // Floating point reciprocal square root estimate (XMM). -defm : SKXWriteResPair; // Floating point reciprocal square root estimate (YMM/ZMM). +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; -defm : SKXWriteResPair; // Fused Multiply Add. -defm : SKXWriteResPair; // Fused Multiply Add (XMM). -defm : SKXWriteResPair; // Fused Multiply Add (YMM/ZMM). +defm : SKXWriteResPair; // Fused Multiply Add. +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; // Floating point double dot product. -defm : SKXWriteResPair; // Floating point single dot product. -defm : SKXWriteResPair; // Floating point single dot product (YMM). +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; // Floating point fabs/fchs. -defm : SKXWriteResPair; // Floating point rounding. -defm : SKXWriteResPair; // Floating point rounding (YMM/ZMM). +defm : SKXWriteResPair; // Floating point rounding. +defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; // Floating point and/or/xor logicals. -defm : SKXWriteResPair; // Floating point and/or/xor logicals (YMM/ZMM). +defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; // Floating point TEST instructions. -defm : SKXWriteResPair; // Floating point TEST instructions (YMM/ZMM). +defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; // Floating point vector shuffles. -defm : SKXWriteResPair; // Floating point vector shuffles (YMM/ZMM). +defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; // Floating point vector variable shuffles. -defm : SKXWriteResPair; // Floating point vector variable shuffles. +defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; // Floating point vector blends. -defm : SKXWriteResPair; // Floating point vector blends. +defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; // Fp vector variable blends. -defm : SKXWriteResPair; // Fp vector variable blends. +defm : SKXWriteResPair; +defm : SKXWriteResPair; // FMA Scheduling helper class. // class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } @@ -279,47 +296,62 @@ defm : X86WriteRes; defm : X86WriteRes; defm : SKXWriteResPair; // Vector integer ALU op, no logicals. -defm : SKXWriteResPair; // Vector integer ALU op, no logicals (XMM). -defm : SKXWriteResPair; // Vector integer ALU op, no logicals (YMM/ZMM). +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; // Vector integer and/or/xor. -defm : SKXWriteResPair; // Vector integer and/or/xor (XMM). -defm : SKXWriteResPair; // Vector integer and/or/xor (YMM/ZMM). +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; // Vector integer TEST instructions. -defm : SKXWriteResPair; // Vector integer TEST instructions (YMM/ZMM). +defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; // Vector integer multiply. -defm : SKXWriteResPair; // Vector integer multiply (XMM). -defm : SKXWriteResPair; // Vector integer multiply (YMM/ZMM). -defm : SKXWriteResPair; // Vector PMULLD. -defm : SKXWriteResPair; // Vector PMULLD (YMM/ZMM). +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; // Vector PMULLD. +defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; // Vector shuffles. -defm : SKXWriteResPair; // Vector shuffles (XMM). -defm : SKXWriteResPair; // Vector shuffles (YMM/ZMM). +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; // Vector variable shuffles. -defm : SKXWriteResPair; // Vector variable shuffles (XMM). -defm : SKXWriteResPair; // Vector variable shuffles (YMM/ZMM). +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; // Vector blends. -defm : SKXWriteResPair; // Vector blends (YMM/ZMM). +defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; // Vector variable blends. -defm : SKXWriteResPair; // Vector variable blends (YMM/ZMM). +defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; // Vector MPSAD. -defm : SKXWriteResPair; // Vector MPSAD. +defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; // Vector PSADBW. -defm : SKXWriteResPair; // Vector PSADBW. -defm : SKXWriteResPair; // Vector PSADBW. -defm : SKXWriteResPair; // Vector PHMINPOS. +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; // Vector PHMINPOS. // Vector integer shifts. defm : SKXWriteResPair; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; defm : SKXWriteResPair; -defm : SKXWriteResPair; // Vector integer immediate shifts (XMM). -defm : SKXWriteResPair; // Vector integer immediate shifts (YMM/ZMM). +defm : SKXWriteResPair; // Vector integer immediate shifts. +defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; // Variable vector shifts. -defm : SKXWriteResPair; // Variable vector shifts (YMM/ZMM). +defm : SKXWriteResPair; +defm : SKXWriteResPair; // Vector insert/extract operations. def : WriteRes { @@ -343,36 +375,46 @@ def : WriteRes { } // Conversion between integer and float. -defm : SKXWriteResPair; -defm : SKXWriteResPair; -defm : SKXWriteResPair; -defm : SKXWriteResPair; -defm : SKXWriteResPair; -defm : SKXWriteResPair; +defm : SKXWriteResPair; // Needs more work: DD vs DQ. +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; -defm : SKXWriteResPair; -defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; // Needs more work: DD vs DQ. defm : SKXWriteResPair; -defm : SKXWriteResPair; -defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; defm : SKXWriteResPair; -defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; defm : SKXWriteResPair; -defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Strings instructions. @@ -589,15 +631,15 @@ def SKXWriteResGroup9 : SchedWriteRes<[SKXPort015]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDMPD(Z|Z128|Z256)rr", - "VBLENDMPS(Z|Z128|Z256)rr", +def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDMPD(Z128|Z256)rr", + "VBLENDMPS(Z128|Z256)rr", "VPADD(B|D|Q|W)(Y|Z|Z128|Z256)rr", "(V?)PADD(B|D|Q|W)rr", "VPBLENDD(Y?)rri", - "VPBLENDMB(Z|Z128|Z256)rr", - "VPBLENDMD(Z|Z128|Z256)rr", - "VPBLENDMQ(Z|Z128|Z256)rr", - "VPBLENDMW(Z|Z128|Z256)rr", + "VPBLENDMB(Z128|Z256)rr", + "VPBLENDMD(Z128|Z256)rr", + "VPBLENDMQ(Z128|Z256)rr", + "VPBLENDMW(Z128|Z256)rr", "VPSUB(B|D|Q|W)(Y|Z|Z128|Z256)rr", "(V?)PSUB(B|D|Q|W)rr", "VPTERNLOGD(Z|Z128|Z256)rri", @@ -611,8 +653,7 @@ def SKXWriteResGroup10 : SchedWriteRes<[SKXPort0156]> { def: InstRW<[SKXWriteResGroup10], (instrs CBW, CWDE, CDQE, CMC, STC)>; def: InstRW<[SKXWriteResGroup10], (instrs LAHF, SAHF)>; // TODO: This doesn't match Agner's data -def: InstRW<[SKXWriteResGroup10], (instregex "NOOP", - "SGDT64m", +def: InstRW<[SKXWriteResGroup10], (instregex "SGDT64m", "SIDT64m", "SMSW16m", "STRm", @@ -739,8 +780,7 @@ def SKXWriteResGroup30 : SchedWriteRes<[SKXPort0]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKXWriteResGroup30], (instregex "KADD(B|D|Q|W)rr", - "KMOV(B|D|Q|W)rk", +def: InstRW<[SKXWriteResGroup30], (instregex "KMOV(B|D|Q|W)rk", "KORTEST(B|D|Q|W)rr", "KTEST(B|D|Q|W)rr")>; @@ -768,6 +808,7 @@ def SKXWriteResGroup32 : SchedWriteRes<[SKXPort5]> { let ResourceCycles = [1]; } def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)", + "KADD(B|D|Q|W)rr", "KSHIFTL(B|D|Q|W)ri", "KSHIFTR(B|D|Q|W)ri", "KUNPCKBWrr", @@ -907,26 +948,44 @@ def SKXWriteResGroup49 : SchedWriteRes<[SKXPort0]> { } def: InstRW<[SKXWriteResGroup49], (instregex "MUL_(FPrST0|FST0r|FrST0)")>; -def SKXWriteResGroup50 : SchedWriteRes<[SKXPort015]> { +def SKXWriteResGroup50 : SchedWriteRes<[SKXPort01]> { let Latency = 4; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKXWriteResGroup50], (instregex "VCVTDQ2PS(Y|Z|Z128|Z256)rr", +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTDQ2PS(Y|Z128|Z256)rr", "(V?)CVTDQ2PSrr", - "VCVTPD2QQ(Z|Z128|Z256)rr", - "VCVTPD2UQQ(Z|Z128|Z256)rr", - "VCVTPS2DQ(Y|Z|Z128|Z256)rr", + "VCVTPD2QQ(Z128|Z256)rr", + "VCVTPD2UQQ(Z128|Z256)rr", + "VCVTPS2DQ(Y|Z128|Z256)rr", "(V?)CVTPS2DQrr", - "VCVTPS2UDQ(Z|Z128|Z256)rr", - "VCVTQQ2PD(Z|Z128|Z256)rr", - "VCVTTPD2QQ(Z|Z128|Z256)rr", - "VCVTTPD2UQQ(Z|Z128|Z256)rr", - "VCVTTPS2DQ(Y|Z|Z128|Z256)rr", + "VCVTPS2UDQ(Z128|Z256)rr", + "VCVTQQ2PD(Z128|Z256)rr", + "VCVTTPD2QQ(Z128|Z256)rr", + "VCVTTPD2UQQ(Z128|Z256)rr", + "VCVTTPS2DQ(Z128|Z256)rr", "(V?)CVTTPS2DQrr", - "VCVTTPS2UDQ(Z|Z128|Z256)rr", - "VCVTUDQ2PS(Z|Z128|Z256)rr", - "VCVTUQQ2PD(Z|Z128|Z256)rr")>; + "VCVTTPS2UDQ(Z128|Z256)rr", + "VCVTUDQ2PS(Z128|Z256)rr", + "VCVTUQQ2PD(Z128|Z256)rr")>; + +def SKXWriteResGroup50z : SchedWriteRes<[SKXPort05]> { + let Latency = 4; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup50z], (instrs VCVTDQ2PSZrr, + VCVTPD2QQZrr, + VCVTPD2UQQZrr, + VCVTPS2DQZrr, + VCVTPS2UDQZrr, + VCVTQQ2PDZrr, + VCVTTPD2QQZrr, + VCVTTPD2UQQZrr, + VCVTTPS2DQZrr, + VCVTTPS2UDQZrr, + VCVTUDQ2PSZrr, + VCVTUQQ2PDZrr)>; def SKXWriteResGroup51 : SchedWriteRes<[SKXPort5]> { let Latency = 4; @@ -1010,13 +1069,6 @@ def: InstRW<[SKXWriteResGroup58], (instregex "MOVSX(16|32|64)rm16", "MOVZX(16|32|64)rm8", "(V?)MOVDDUPrm")>; // TODO: Should this be SKXWriteResGroup71? -def SKXWriteResGroup59 : SchedWriteRes<[SKXPort015]> { - let Latency = 5; - let NumMicroOps = 2; - let ResourceCycles = [2]; -} -def: InstRW<[SKXWriteResGroup59], (instregex "VCVTSD2SSZrr")>; - def SKXWriteResGroup61 : SchedWriteRes<[SKXPort5,SKXPort015]> { let Latency = 5; let NumMicroOps = 2; @@ -1035,7 +1087,7 @@ def: InstRW<[SKXWriteResGroup61], (instregex "MMX_CVT(T?)PD2PIirr", "VCVTPS2QQZ128rr", "VCVTPS2UQQZ128rr", "VCVTQQ2PSZ128rr", - "(V?)CVTSD2SSrr", + "(V?)CVTSD2SS(Z?)rr", "(V?)CVTSI(64)?2SDrr", "VCVTSI2SSZrr", "(V?)CVTSI2SSrr", @@ -1136,7 +1188,7 @@ def: InstRW<[SKXWriteResGroup71], (instregex "VBROADCASTSSrm", "VPBROADCASTDrm", "VPBROADCASTQrm")>; -def SKXWriteResGroup72 : SchedWriteRes<[SKXPort0]> { +def SKXWriteResGroup72 : SchedWriteRes<[SKXPort5]> { let Latency = 6; let NumMicroOps = 2; let ResourceCycles = [2]; @@ -1286,7 +1338,7 @@ def: InstRW<[SKXWriteResGroup89], (instregex "LD_F(32|64|80)m", "VPBROADCASTDYrm", "VPBROADCASTQYrm")>; -def SKXWriteResGroup90 : SchedWriteRes<[SKXPort0,SKXPort5]> { +def SKXWriteResGroup90 : SchedWriteRes<[SKXPort01,SKXPort5]> { let Latency = 7; let NumMicroOps = 2; let ResourceCycles = [1,1]; @@ -1318,21 +1370,40 @@ def SKXWriteResGroup93 : SchedWriteRes<[SKXPort5,SKXPort015]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKXWriteResGroup93], (instregex "VCVTDQ2PD(Z|Z256)rr", - "VCVTPD2DQ(Y|Z|Z256)rr", - "VCVTPD2PS(Y|Z|Z256)rr", - "VCVTPD2UDQ(Z|Z256)rr", - "VCVTPS2PD(Y|Z|Z256)rr", - "VCVTPS2QQ(Z|Z256)rr", - "VCVTPS2UQQ(Z|Z256)rr", - "VCVTQQ2PS(Z|Z256)rr", - "VCVTTPD2DQ(Y|Z|Z256)rr", - "VCVTTPD2UDQ(Z|Z256)rr", - "VCVTTPS2QQ(Z|Z256)rr", - "VCVTTPS2UQQ(Z|Z256)rr", - "VCVTUDQ2PD(Z|Z256)rr", - "VCVTUQQ2PS(Z|Z256)rr")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTDQ2PDZ256rr", + "VCVTPD2DQ(Y|Z256)rr", + "VCVTPD2PS(Y|Z256)rr", + "VCVTPD2UDQZ256rr", + "VCVTPS2PD(Y|Z256)rr", + "VCVTPS2QQZ256rr", + "VCVTPS2UQQZ256rr", + "VCVTQQ2PSZ256rr", + "VCVTTPD2DQ(Y|Z256)rr", + "VCVTTPD2UDQZ256rr", + "VCVTTPS2QQZ256rr", + "VCVTTPS2UQQZ256rr", + "VCVTUDQ2PDZ256rr", + "VCVTUQQ2PSZ256rr")>; +def SKXWriteResGroup93z : SchedWriteRes<[SKXPort5,SKXPort05]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup93z], (instrs VCVTDQ2PDZrr, + VCVTPD2DQZrr, + VCVTPD2PSZrr, + VCVTPD2UDQZrr, + VCVTPS2PDZrr, + VCVTPS2QQZrr, + VCVTPS2UQQZrr, + VCVTQQ2PSZrr, + VCVTTPD2DQZrr, + VCVTTPD2UDQZrr, + VCVTTPS2QQZrr, + VCVTTPS2UQQZrr, + VCVTUDQ2PDZrr, + VCVTUQQ2PSZrr)>; def SKXWriteResGroup95 : SchedWriteRes<[SKXPort23,SKXPort015]> { let Latency = 7; @@ -1405,8 +1476,8 @@ def SKXWriteResGroup100 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort015]> { let ResourceCycles = [1,1,1]; } def: InstRW<[SKXWriteResGroup100], (instregex "VCVTSS2USI64Zrr", - "VCVTTSS2SI(64)?Zrr", - "(V?)CVTTSS2SI(64)?rr", + "(V?)CVTSS2SI64(Z?)rr", + "(V?)CVTTSS2SI64(Z?)rr", "VCVTTSS2USI64Zrr")>; def SKXWriteResGroup101 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort05]> { @@ -1751,16 +1822,6 @@ def SKXWriteResGroup137 : SchedWriteRes<[SKXPort23,SKXPort015]> { def: InstRW<[SKXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIirm", "(V?)CVTPS2PDrm")>; -def SKXWriteResGroup138 : SchedWriteRes<[SKXPort0,SKXPort015]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; -} -def: InstRW<[SKXWriteResGroup138], (instregex "VRCP14PDZr(b?)", - "VRCP14PSZr(b?)", - "VRSQRT14PDZr(b?)", - "VRSQRT14PSZr(b?)")>; - def SKXWriteResGroup142 : SchedWriteRes<[SKXPort1,SKXPort5,SKXPort23]> { let Latency = 9; let NumMicroOps = 3; @@ -2009,12 +2070,19 @@ def SKXWriteResGroup171 : SchedWriteRes<[SKXPort06,SKXPort0156]> { } def: InstRW<[SKXWriteResGroup171], (instrs LOOPE, LOOPNE)>; -def SKXWriteResGroup174 : SchedWriteRes<[SKXPort015]> { +def SKXWriteResGroup174 : SchedWriteRes<[SKXPort01]> { let Latency = 12; let NumMicroOps = 3; let ResourceCycles = [3]; } -def: InstRW<[SKXWriteResGroup174], (instregex "VPMULLQ(Z|Z128|Z256)rr")>; +def: InstRW<[SKXWriteResGroup174], (instregex "VPMULLQ(Z128|Z256)rr")>; + +def SKXWriteResGroup174z : SchedWriteRes<[SKXPort05]> { + let Latency = 12; + let NumMicroOps = 3; + let ResourceCycles = [3]; +} +def: InstRW<[SKXWriteResGroup174z], (instregex "VPMULLQZrr")>; def SKXWriteResGroup175 : SchedWriteRes<[SKXPort5,SKXPort23]> { let Latency = 12; @@ -2143,16 +2211,6 @@ def SKXWriteResGroup195 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort06 } def: InstRW<[SKXWriteResGroup195], (instregex "RCL(8|16|32|64)mCL")>; -def SKXWriteResGroup198 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015]> { - let Latency = 16; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; -} -def: InstRW<[SKXWriteResGroup198], (instregex "VRCP14PDZm(b?)", - "VRCP14PSZm(b?)", - "VRSQRT14PDZm(b?)", - "VRSQRT14PSZm(b?)")>; - def SKXWriteResGroup199 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06,SKXPort15,SKXPort0156]> { let Latency = 16; let NumMicroOps = 14; diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index 8aad2ad7009c..be3ff86e0b4c 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -62,7 +62,6 @@ multiclass X86WriteResPairUnsupported { } } - // Multiclass that wraps X86FoldableSchedWrite for each vector width. class X86SchedWriteWidths Integer. defm WriteCvtPD2I : X86SchedWritePair; // Double -> Integer (XMM). -defm WriteCvtPD2IY : X86SchedWritePair; // Double -> Integer (YMM/ZMM). +defm WriteCvtPD2IY : X86SchedWritePair; // Double -> Integer (YMM). +defm WriteCvtPD2IZ : X86SchedWritePair; // Double -> Integer (ZMM). defm WriteCvtSS2I : X86SchedWritePair; // Float -> Integer. defm WriteCvtPS2I : X86SchedWritePair; // Float -> Integer (XMM). -defm WriteCvtPS2IY : X86SchedWritePair; // Float -> Integer (YMM/ZMM). +defm WriteCvtPS2IY : X86SchedWritePair; // Float -> Integer (YMM). +defm WriteCvtPS2IZ : X86SchedWritePair; // Float -> Integer (ZMM). defm WriteCvtI2SD : X86SchedWritePair; // Integer -> Double. defm WriteCvtI2PD : X86SchedWritePair; // Integer -> Double (XMM). -defm WriteCvtI2PDY : X86SchedWritePair; // Integer -> Double (YMM/ZMM). +defm WriteCvtI2PDY : X86SchedWritePair; // Integer -> Double (YMM). +defm WriteCvtI2PDZ : X86SchedWritePair; // Integer -> Double (ZMM). defm WriteCvtI2SS : X86SchedWritePair; // Integer -> Float. defm WriteCvtI2PS : X86SchedWritePair; // Integer -> Float (XMM). -defm WriteCvtI2PSY : X86SchedWritePair; // Integer -> Float (YMM/ZMM). +defm WriteCvtI2PSY : X86SchedWritePair; // Integer -> Float (YMM). +defm WriteCvtI2PSZ : X86SchedWritePair; // Integer -> Float (ZMM). defm WriteCvtSS2SD : X86SchedWritePair; // Float -> Double size conversion. defm WriteCvtPS2PD : X86SchedWritePair; // Float -> Double size conversion (XMM). -defm WriteCvtPS2PDY : X86SchedWritePair; // Float -> Double size conversion (YMM/ZMM). +defm WriteCvtPS2PDY : X86SchedWritePair; // Float -> Double size conversion (YMM). +defm WriteCvtPS2PDZ : X86SchedWritePair; // Float -> Double size conversion (ZMM). defm WriteCvtSD2SS : X86SchedWritePair; // Double -> Float size conversion. defm WriteCvtPD2PS : X86SchedWritePair; // Double -> Float size conversion (XMM). -defm WriteCvtPD2PSY : X86SchedWritePair; // Double -> Float size conversion (YMM/ZMM). +defm WriteCvtPD2PSY : X86SchedWritePair; // Double -> Float size conversion (YMM). +defm WriteCvtPD2PSZ : X86SchedWritePair; // Double -> Float size conversion (ZMM). defm WriteCvtPH2PS : X86SchedWritePair; // Half -> Float size conversion. -defm WriteCvtPH2PSY : X86SchedWritePair; // Half -> Float size conversion (YMM/ZMM). +defm WriteCvtPH2PSY : X86SchedWritePair; // Half -> Float size conversion (YMM). +defm WriteCvtPH2PSZ : X86SchedWritePair; // Half -> Float size conversion (ZMM). def WriteCvtPS2PH : SchedWrite; // // Float -> Half size conversion. -def WriteCvtPS2PHY : SchedWrite; // // Float -> Half size conversion (YMM/ZMM). +def WriteCvtPS2PHY : SchedWrite; // // Float -> Half size conversion (YMM). +def WriteCvtPS2PHZ : SchedWrite; // // Float -> Half size conversion (ZMM). def WriteCvtPS2PHSt : SchedWrite; // // Float -> Half + store size conversion. -def WriteCvtPS2PHYSt : SchedWrite; // // Float -> Half + store size conversion (YMM/ZMM). +def WriteCvtPS2PHYSt : SchedWrite; // // Float -> Half + store size conversion (YMM). +def WriteCvtPS2PHZSt : SchedWrite; // // Float -> Half + store size conversion (ZMM). // CRC32 instruction. defm WriteCRC32 : X86SchedWritePair; @@ -387,7 +427,8 @@ defm WriteFVarShuffle256 : X86SchedWritePair; // Fp 256-bit width variable shuff defm WriteShuffle256 : X86SchedWritePair; // 256-bit width vector shuffles. defm WriteVarShuffle256 : X86SchedWritePair; // 256-bit width vector variable shuffles. defm WriteVarVecShift : X86SchedWritePair; // Variable vector shifts. -defm WriteVarVecShiftY : X86SchedWritePair; // Variable vector shifts (YMM/ZMM). +defm WriteVarVecShiftY : X86SchedWritePair; // Variable vector shifts (YMM). +defm WriteVarVecShiftZ : X86SchedWritePair; // Variable vector shifts (ZMM). // Old microcoded instructions that nobody use. def WriteMicrocoded : SchedWrite; @@ -441,25 +482,25 @@ def SchedWriteVecMoveLSNT // Vector width wrappers. def SchedWriteFAdd - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFAdd64 - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFHAdd - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFCmp - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFCmp64 - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFMul - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFMul64 - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFMA - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteDPPD : X86SchedWriteWidths; def SchedWriteDPPS - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFDiv : X86SchedWriteWidths; def SchedWriteFDiv64 @@ -471,90 +512,90 @@ def SchedWriteFSqrt64 : X86SchedWriteWidths; def SchedWriteFRcp - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFRsqrt - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFRnd - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFLogic - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFTest - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFShuffle : X86SchedWriteWidths; + WriteFShuffleY, WriteFShuffleZ>; def SchedWriteFVarShuffle : X86SchedWriteWidths; + WriteFVarShuffleY, WriteFVarShuffleZ>; def SchedWriteFBlend - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFVarBlend : X86SchedWriteWidths; + WriteFVarBlendY, WriteFVarBlendZ>; def SchedWriteCvtDQ2PD : X86SchedWriteWidths; + WriteCvtI2PDY, WriteCvtI2PDZ>; def SchedWriteCvtDQ2PS : X86SchedWriteWidths; + WriteCvtI2PSY, WriteCvtI2PSZ>; def SchedWriteCvtPD2DQ : X86SchedWriteWidths; + WriteCvtPD2IY, WriteCvtPD2IZ>; def SchedWriteCvtPS2DQ : X86SchedWriteWidths; + WriteCvtPS2IY, WriteCvtPS2IZ>; def SchedWriteCvtPS2PD : X86SchedWriteWidths; + WriteCvtPS2PDY, WriteCvtPS2PDZ>; def SchedWriteCvtPD2PS : X86SchedWriteWidths; + WriteCvtPD2PSY, WriteCvtPD2PSZ>; def SchedWriteVecALU - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWritePHAdd - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteVecLogic : X86SchedWriteWidths; + WriteVecLogicY, WriteVecLogicZ>; def SchedWriteVecTest : X86SchedWriteWidths; + WriteVecTestY, WriteVecTestZ>; def SchedWriteVecShift : X86SchedWriteWidths; + WriteVecShiftY, WriteVecShiftZ>; def SchedWriteVecShiftImm : X86SchedWriteWidths; + WriteVecShiftImmY, WriteVecShiftImmZ>; def SchedWriteVarVecShift : X86SchedWriteWidths; + WriteVarVecShiftY, WriteVarVecShiftZ>; def SchedWriteVecIMul : X86SchedWriteWidths; + WriteVecIMulY, WriteVecIMulZ>; def SchedWritePMULLD : X86SchedWriteWidths; + WritePMULLDY, WritePMULLDZ>; def SchedWriteMPSAD : X86SchedWriteWidths; + WriteMPSADY, WriteMPSADZ>; def SchedWritePSADBW : X86SchedWriteWidths; + WritePSADBWY, WritePSADBWZ>; def SchedWriteShuffle : X86SchedWriteWidths; + WriteShuffleY, WriteShuffleZ>; def SchedWriteVarShuffle : X86SchedWriteWidths; + WriteVarShuffleY, WriteVarShuffleZ>; def SchedWriteBlend - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteVarBlend : X86SchedWriteWidths; + WriteVarBlendY, WriteVarBlendZ>; // Vector size wrappers. def SchedWriteFAddSizes diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index f2a4156b0bcd..83ee19b73634 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -210,28 +210,36 @@ defm : X86WriteRes; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; @@ -252,24 +260,33 @@ defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; @@ -280,30 +297,39 @@ defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; defm : X86WriteResUnsupported; defm : X86WriteResUnsupported; defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; //////////////////////////////////////////////////////////////////////////////// // Vector integer operations. @@ -334,42 +360,56 @@ defm : X86WriteRes; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; //////////////////////////////////////////////////////////////////////////////// // Vector insert/extract operations. diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index e7597b4fcfb4..fd41b2f7ed2e 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -298,34 +298,44 @@ defm : X86WriteRes; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResYMMPair; @@ -346,18 +356,25 @@ defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : X86WriteResPairUnsupported; @@ -368,33 +385,42 @@ defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; // FIXME: f+3 ST, LD+STC latency defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteResUnsupported; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteResUnsupported; //////////////////////////////////////////////////////////////////////////////// // Vector integer operations. @@ -425,42 +451,56 @@ defm : X86WriteRes; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; -defm : JWriteResYMMPair; +defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; //////////////////////////////////////////////////////////////////////////////// // Vector insert/extract operations. @@ -555,7 +595,7 @@ def JWriteZeroLatency : SchedWriteRes<[]> { let Latency = 0; } -// Certain instructions that use the same register for both source +// Certain instructions that use the same register for both source // operands do not have a real dependency on the previous contents of the // register, and thus, do not have to wait before completing. They can be // optimized out at register renaming stage. diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index b8386972de04..1ac3123cfcc4 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -159,22 +159,28 @@ defm : X86WriteRes; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; @@ -186,9 +192,11 @@ defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; @@ -201,40 +209,52 @@ defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; // Conversion between integer and float. defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; // Vector integer operations. def : WriteRes { let Latency = 3; } @@ -260,37 +280,49 @@ def : WriteRes; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; // FIXME: The below is closer to correct, but caused some perf regressions. //defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; // Vector insert/extract operations. @@ -309,9 +341,11 @@ def : WriteRes { defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; // String instructions. // Packed Compare Implicit Length Strings, Return Mask @@ -407,25 +441,33 @@ def : WriteRes; // scheduling resources anyway. def : WriteRes; defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; defm : X86WriteResUnsupported; defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; defm : X86WriteResUnsupported; defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; } // SchedModel diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index df496e230ecf..6e6fe146e78b 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -212,34 +212,45 @@ defm : X86WriteRes; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; //defm : ZnWriteResFpuPair; @@ -251,29 +262,39 @@ defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; // FIXME: Should folds require 1 extra uops? defm : ZnWriteResFpuPair; // FIXME: Should folds require 1 extra uops? +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; //defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; //defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; @@ -309,40 +330,52 @@ defm : X86WriteRes; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; // FIXME defm : ZnWriteResFpuPair; // FIXME +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; // Vector Shift Operations defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; // Vector insert/extract operations. defm : ZnWriteResFpuPair; @@ -1144,9 +1177,10 @@ def ZnWriteCVTPD2PSYr: SchedWriteRes<[ZnFPU3]> { // CVTPD2PS. // x,x. def : SchedAlias; - // y,y. def : SchedAlias; +// z,z. +defm : X86WriteResUnsupported; def ZnWriteCVTPD2PSLd: SchedWriteRes<[ZnAGU,ZnFPU03]> { let Latency = 11; @@ -1161,6 +1195,8 @@ def ZnWriteCVTPD2PSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> { let Latency = 11; } def : SchedAlias; +// z,m512 +defm : X86WriteResUnsupported; // CVTSD2SS. // x,x. @@ -1185,12 +1221,14 @@ def ZnWriteCVTPS2PDLd : SchedWriteRes<[ZnAGU, ZnFPU3]> { } def : SchedAlias; def : SchedAlias; +defm : X86WriteResUnsupported; // y,x. def ZnWriteVCVTPS2PDY : SchedWriteRes<[ZnFPU3]> { let Latency = 3; } def : SchedAlias; +defm : X86WriteResUnsupported; // CVTSS2SD. // x,x. @@ -1288,17 +1326,21 @@ def : InstRW<[ZnWriteCVSTSI2SILd], (instregex "(V?)CVT(T?)SD2SI(64)?rm")>; // x,v,i. def : SchedAlias; def : SchedAlias; +defm : X86WriteResUnsupported; // m,v,i. def : SchedAlias; def : SchedAlias; +defm : X86WriteResUnsupported; // VCVTPH2PS. // v,x. def : SchedAlias; def : SchedAlias; +defm : X86WriteResUnsupported; // v,m. def : SchedAlias; def : SchedAlias; +defm : X86WriteResUnsupported; //-- SSE4A instructions --// // EXTRQ diff --git a/llvm/test/CodeGen/X86/avx-schedule.ll b/llvm/test/CodeGen/X86/avx-schedule.ll index 6c137846dd95..7ec84256a9fc 100644 --- a/llvm/test/CodeGen/X86/avx-schedule.ll +++ b/llvm/test/CodeGen/X86/avx-schedule.ll @@ -42,7 +42,7 @@ define <4 x double> @test_addpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; ; SKX-LABEL: test_addpd: ; SKX: # %bb.0: -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -96,7 +96,7 @@ define <8 x float> @test_addps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; ; SKX-LABEL: test_addps: ; SKX: # %bb.0: -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -150,7 +150,7 @@ define <4 x double> @test_addsubpd(<4 x double> %a0, <4 x double> %a1, <4 x doub ; ; SKX-LABEL: test_addsubpd: ; SKX: # %bb.0: -; SKX-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -205,7 +205,7 @@ define <8 x float> @test_addsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> ; ; SKX-LABEL: test_addsubps: ; SKX: # %bb.0: -; SKX-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -267,7 +267,7 @@ define <4 x double> @test_andnotpd(<4 x double> %a0, <4 x double> %a1, <4 x doub ; SKX: # %bb.0: ; SKX-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; SKX-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_andnotpd: @@ -336,7 +336,7 @@ define <8 x float> @test_andnotps(<8 x float> %a0, <8 x float> %a1, <8 x float> ; SKX: # %bb.0: ; SKX-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; SKX-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_andnotps: @@ -405,7 +405,7 @@ define <4 x double> @test_andpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SKX: # %bb.0: ; SKX-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; SKX-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_andpd: @@ -472,7 +472,7 @@ define <8 x float> @test_andps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; SKX: # %bb.0: ; SKX-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; SKX-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_andps: @@ -538,7 +538,7 @@ define <4 x double> @test_blendpd(<4 x double> %a0, <4 x double> %a1, <4 x doubl ; SKX-LABEL: test_blendpd: ; SKX: # %bb.0: ; SKX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33] -; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -602,7 +602,7 @@ define <8 x float> @test_blendps(<8 x float> %a0, <8 x float> %a1, <8 x float> * ; SKX: # %bb.0: ; SKX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33] ; SKX-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_blendps: @@ -956,7 +956,7 @@ define <4 x double> @test_cmppd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; ; SKX-LABEL: test_cmppd: ; SKX: # %bb.0: -; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [4:0.33] +; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [4:0.50] ; SKX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] @@ -1022,7 +1022,7 @@ define <8 x float> @test_cmpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; ; SKX-LABEL: test_cmpps: ; SKX: # %bb.0: -; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [4:0.33] +; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [4:0.50] ; SKX-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] @@ -1090,7 +1090,7 @@ define <4 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { ; SKX: # %bb.0: ; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00] ; SKX-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [13:1.00] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvtdq2pd: @@ -1153,9 +1153,9 @@ define <8 x float> @test_cvtdq2ps(<8 x i32> %a0, <8 x i32> *%a1) { ; ; SKX-LABEL: test_cvtdq2ps: ; SKX: # %bb.0: -; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [11:0.50] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvtdq2ps: @@ -1217,7 +1217,7 @@ define <8 x i32> @test_cvtpd2dq(<4 x double> %a0, <4 x double> *%a1) { ; SKX-LABEL: test_cvtpd2dq: ; SKX: # %bb.0: ; SKX-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [7:1.00] -; SKX-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [8:1.00] +; SKX-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [8:0.50] ; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -1281,7 +1281,7 @@ define <8 x i32> @test_cvttpd2dq(<4 x double> %a0, <4 x double> *%a1) { ; SKX-LABEL: test_cvttpd2dq: ; SKX: # %bb.0: ; SKX-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00] -; SKX-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00] +; SKX-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:0.50] ; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -1406,7 +1406,7 @@ define <8 x i32> @test_cvtps2dq(<8 x float> %a0, <8 x float> *%a1) { ; ; SKX-LABEL: test_cvtps2dq: ; SKX: # %bb.0: -; SKX-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [11:0.50] ; SKX-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] @@ -1470,7 +1470,7 @@ define <8 x i32> @test_cvttps2dq(<8 x float> %a0, <8 x float> *%a1) { ; ; SKX-LABEL: test_cvttps2dq: ; SKX: # %bb.0: -; SKX-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:0.50] ; SKX-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [11:0.50] ; SKX-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] @@ -1979,7 +1979,7 @@ define <8 x float> @test_insertf128(<8 x float> %a0, <4 x float> %a1, <4 x float ; SKX: # %bb.0: ; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] ; SKX-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_insertf128: @@ -2334,7 +2334,7 @@ define <4 x double> @test_maxpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; ; SKX-LABEL: test_maxpd: ; SKX: # %bb.0: -; SKX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -2389,7 +2389,7 @@ define <8 x float> @test_maxps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; ; SKX-LABEL: test_maxps: ; SKX: # %bb.0: -; SKX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -2444,7 +2444,7 @@ define <4 x double> @test_minpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; ; SKX-LABEL: test_minpd: ; SKX: # %bb.0: -; SKX-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -2499,7 +2499,7 @@ define <8 x float> @test_minps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; ; SKX-LABEL: test_minps: ; SKX: # %bb.0: -; SKX-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -2560,7 +2560,7 @@ define <4 x double> @test_movapd(<4 x double> *%a0, <4 x double> *%a1) { ; SKX-LABEL: test_movapd: ; SKX: # %bb.0: ; SKX-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50] -; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -2622,7 +2622,7 @@ define <8 x float> @test_movaps(<8 x float> *%a0, <8 x float> *%a1) { ; SKX-LABEL: test_movaps: ; SKX: # %bb.0: ; SKX-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50] -; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -2685,7 +2685,7 @@ define <4 x double> @test_movddup(<4 x double> %a0, <4 x double> *%a1) { ; SKX: # %bb.0: ; SKX-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] ; SKX-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movddup: @@ -2912,7 +2912,7 @@ define <4 x double> @test_movntpd(<4 x double> %a0, <4 x double> *%a1) { ; ; SKX-LABEL: test_movntpd: ; SKX: # %bb.0: -; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -2965,7 +2965,7 @@ define <8 x float> @test_movntps(<8 x float> %a0, <8 x float> *%a1) { ; ; SKX-LABEL: test_movntps: ; SKX: # %bb.0: -; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -3025,7 +3025,7 @@ define <8 x float> @test_movshdup(<8 x float> %a0, <8 x float> *%a1) { ; SKX: # %bb.0: ; SKX-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] ; SKX-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movshdup: @@ -3088,7 +3088,7 @@ define <8 x float> @test_movsldup(<8 x float> %a0, <8 x float> *%a1) { ; SKX: # %bb.0: ; SKX-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] ; SKX-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movsldup: @@ -3152,7 +3152,7 @@ define <4 x double> @test_movupd(<4 x double> *%a0, <4 x double> *%a1) { ; SKX-LABEL: test_movupd: ; SKX: # %bb.0: ; SKX-NEXT: vmovupd (%rdi), %ymm0 # sched: [7:0.50] -; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -3216,7 +3216,7 @@ define <8 x float> @test_movups(<8 x float> *%a0, <8 x float> *%a1) { ; SKX-LABEL: test_movups: ; SKX: # %bb.0: ; SKX-NEXT: vmovups (%rdi), %ymm0 # sched: [7:0.50] -; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -3272,7 +3272,7 @@ define <4 x double> @test_mulpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; ; SKX-LABEL: test_mulpd: ; SKX: # %bb.0: -; SKX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -3326,7 +3326,7 @@ define <8 x float> @test_mulps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; ; SKX-LABEL: test_mulps: ; SKX: # %bb.0: -; SKX-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -3387,7 +3387,7 @@ define <4 x double> @orpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) ; SKX: # %bb.0: ; SKX-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; SKX-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: orpd: @@ -3454,7 +3454,7 @@ define <8 x float> @test_orps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2 ; SKX: # %bb.0: ; SKX-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; SKX-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_orps: @@ -3521,7 +3521,7 @@ define <4 x double> @test_perm2f128(<4 x double> %a0, <4 x double> %a1, <4 x dou ; SKX: # %bb.0: ; SKX-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] ; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] -; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_perm2f128: @@ -3584,7 +3584,7 @@ define <2 x double> @test_permilpd(<2 x double> %a0, <2 x double> *%a1) { ; SKX: # %bb.0: ; SKX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00] ; SKX-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_permilpd: @@ -3647,7 +3647,7 @@ define <4 x double> @test_permilpd_ymm(<4 x double> %a0, <4 x double> *%a1) { ; SKX: # %bb.0: ; SKX-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00] ; SKX-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_permilpd_ymm: @@ -3710,7 +3710,7 @@ define <4 x float> @test_permilps(<4 x float> %a0, <4 x float> *%a1) { ; SKX: # %bb.0: ; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00] ; SKX-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_permilps: @@ -3773,7 +3773,7 @@ define <8 x float> @test_permilps_ymm(<8 x float> %a0, <8 x float> *%a1) { ; SKX: # %bb.0: ; SKX-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] ; SKX-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_permilps_ymm: @@ -4056,7 +4056,7 @@ define <8 x float> @test_rcpps(<8 x float> %a0, <8 x float> *%a1) { ; SKX: # %bb.0: ; SKX-NEXT: vrcpps %ymm0, %ymm0 # sched: [4:1.00] ; SKX-NEXT: vrcpps (%rdi), %ymm1 # sched: [11:1.00] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_rcpps: @@ -4118,9 +4118,9 @@ define <4 x double> @test_roundpd(<4 x double> %a0, <4 x double> *%a1) { ; ; SKX-LABEL: test_roundpd: ; SKX: # %bb.0: -; SKX-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [8:0.67] -; SKX-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [15:0.67] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [8:1.00] +; SKX-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [15:1.00] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_roundpd: @@ -4182,9 +4182,9 @@ define <8 x float> @test_roundps(<8 x float> %a0, <8 x float> *%a1) { ; ; SKX-LABEL: test_roundps: ; SKX: # %bb.0: -; SKX-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [8:0.67] -; SKX-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [15:0.67] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [8:1.00] +; SKX-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [15:1.00] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_roundps: @@ -4248,7 +4248,7 @@ define <8 x float> @test_rsqrtps(<8 x float> %a0, <8 x float> *%a1) { ; SKX: # %bb.0: ; SKX-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [4:1.00] ; SKX-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [11:1.00] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_rsqrtps: @@ -4312,7 +4312,7 @@ define <4 x double> @test_shufpd(<4 x double> %a0, <4 x double> %a1, <4 x double ; SKX: # %bb.0: ; SKX-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] ; SKX-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_shufpd: @@ -4375,7 +4375,7 @@ define <8 x float> @test_shufps(<8 x float> %a0, <8 x float> %a1, <8 x float> *% ; SKX: # %bb.0: ; SKX-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] ; SKX-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [8:1.00] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_shufps: @@ -4438,7 +4438,7 @@ define <4 x double> @test_sqrtpd(<4 x double> %a0, <4 x double> *%a1) { ; SKX: # %bb.0: ; SKX-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [18:12.00] ; SKX-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [25:12.00] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_sqrtpd: @@ -4502,7 +4502,7 @@ define <8 x float> @test_sqrtps(<8 x float> %a0, <8 x float> *%a1) { ; SKX: # %bb.0: ; SKX-NEXT: vsqrtps %ymm0, %ymm0 # sched: [12:6.00] ; SKX-NEXT: vsqrtps (%rdi), %ymm1 # sched: [19:6.00] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_sqrtps: @@ -4559,7 +4559,7 @@ define <4 x double> @test_subpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; ; SKX-LABEL: test_subpd: ; SKX: # %bb.0: -; SKX-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -4613,7 +4613,7 @@ define <8 x float> @test_subps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; ; SKX-LABEL: test_subps: ; SKX: # %bb.0: -; SKX-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -5008,7 +5008,7 @@ define <4 x double> @test_unpckhpd(<4 x double> %a0, <4 x double> %a1, <4 x doub ; SKX: # %bb.0: ; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] ; SKX-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_unpckhpd: @@ -5125,7 +5125,7 @@ define <4 x double> @test_unpcklpd(<4 x double> %a0, <4 x double> %a1, <4 x doub ; SKX: # %bb.0: ; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] ; SKX-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_unpcklpd: @@ -5242,7 +5242,7 @@ define <4 x double> @test_xorpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SKX: # %bb.0: ; SKX-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; SKX-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_xorpd: @@ -5309,7 +5309,7 @@ define <8 x float> @test_xorps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; SKX: # %bb.0: ; SKX-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; SKX-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_xorps: diff --git a/llvm/test/CodeGen/X86/avx2-schedule.ll b/llvm/test/CodeGen/X86/avx2-schedule.ll index 4bfbf1ec2089..1bfe60e31042 100644 --- a/llvm/test/CodeGen/X86/avx2-schedule.ll +++ b/llvm/test/CodeGen/X86/avx2-schedule.ll @@ -76,7 +76,7 @@ define <4 x double> @test_broadcastsd_ymm(<2 x double> %a0) { ; SKX-LABEL: test_broadcastsd_ymm: ; SKX: # %bb.0: ; SKX-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_broadcastsd_ymm: @@ -117,7 +117,7 @@ define <4 x float> @test_broadcastss(<4 x float> %a0) { ; SKX-LABEL: test_broadcastss: ; SKX: # %bb.0: ; SKX-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_broadcastss: @@ -158,7 +158,7 @@ define <8 x float> @test_broadcastss_ymm(<4 x float> %a0) { ; SKX-LABEL: test_broadcastss_ymm: ; SKX: # %bb.0: ; SKX-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_broadcastss_ymm: @@ -2634,7 +2634,7 @@ define <4 x double> @test_permpd(<4 x double> %a0, <4 x double> *%a1) { ; SKX: # %bb.0: ; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] ; SKX-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_permpd: @@ -2683,7 +2683,7 @@ define <8 x float> @test_permps(<8 x i32> %a0, <8 x float> %a1, <8 x float> *%a2 ; SKX: # %bb.0: ; SKX-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] ; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_permps: @@ -3320,7 +3320,7 @@ define <16 x i16> @test_pmaddubsw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) ; ; SKX-LABEL: test_pmaddubsw: ; SKX: # %bb.0: -; SKX-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -3364,7 +3364,7 @@ define <8 x i32> @test_pmaddwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; ; SKX-LABEL: test_pmaddwd: ; SKX: # %bb.0: -; SKX-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -4761,7 +4761,7 @@ define <4 x i64> @test_pmuldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; ; SKX-LABEL: test_pmuldq: ; SKX: # %bb.0: -; SKX-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -4805,7 +4805,7 @@ define <16 x i16> @test_pmulhrsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2 ; ; SKX-LABEL: test_pmulhrsw: ; SKX: # %bb.0: -; SKX-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -4848,7 +4848,7 @@ define <16 x i16> @test_pmulhuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; ; SKX-LABEL: test_pmulhuw: ; SKX: # %bb.0: -; SKX-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -4891,7 +4891,7 @@ define <16 x i16> @test_pmulhw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; ; SKX-LABEL: test_pmulhw: ; SKX: # %bb.0: -; SKX-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -4934,8 +4934,8 @@ define <8 x i32> @test_pmulld(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; ; SKX-LABEL: test_pmulld: ; SKX: # %bb.0: -; SKX-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:0.67] -; SKX-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [17:0.67] +; SKX-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:1.00] +; SKX-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [17:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmulld: @@ -4976,7 +4976,7 @@ define <16 x i16> @test_pmullw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; ; SKX-LABEL: test_pmullw: ; SKX: # %bb.0: -; SKX-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -5018,7 +5018,7 @@ define <4 x i64> @test_pmuludq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; ; SKX-LABEL: test_pmuludq: ; SKX: # %bb.0: -; SKX-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll index aeabf2f7cead..28af00ae3b6f 100755 --- a/llvm/test/CodeGen/X86/avx512-schedule.ll +++ b/llvm/test/CodeGen/X86/avx512-schedule.ll @@ -12,7 +12,7 @@ define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) { ; ; SKX-LABEL: addpd512: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %add.i = fadd <8 x double> %x, %y @@ -42,7 +42,7 @@ define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) { ; ; SKX-LABEL: addps512: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vaddps %zmm0, %zmm1, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %zmm0, %zmm1, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %add.i = fadd <16 x float> %x, %y @@ -72,7 +72,7 @@ define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) { ; ; SKX-LABEL: subpd512: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vsubpd %zmm0, %zmm1, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vsubpd %zmm0, %zmm1, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %sub.i = fsub <8 x double> %x, %y @@ -103,7 +103,7 @@ define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) { ; ; SKX-LABEL: subps512: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vsubps %zmm0, %zmm1, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vsubps %zmm0, %zmm1, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %sub.i = fsub <16 x float> %x, %y @@ -134,7 +134,7 @@ define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) { ; ; SKX-LABEL: imulq512: ; SKX: # %bb.0: -; SKX-NEXT: vpmullq %zmm0, %zmm1, %zmm0 # sched: [12:1.00] +; SKX-NEXT: vpmullq %zmm0, %zmm1, %zmm0 # sched: [12:1.50] ; SKX-NEXT: retq # sched: [7:1.00] %z = mul <8 x i64>%x, %y ret <8 x i64>%z @@ -148,7 +148,7 @@ define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) { ; ; SKX-LABEL: imulq256: ; SKX: # %bb.0: -; SKX-NEXT: vpmullq %ymm0, %ymm1, %ymm0 # sched: [12:1.00] +; SKX-NEXT: vpmullq %ymm0, %ymm1, %ymm0 # sched: [12:1.50] ; SKX-NEXT: retq # sched: [7:1.00] %z = mul <4 x i64>%x, %y ret <4 x i64>%z @@ -162,7 +162,7 @@ define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) { ; ; SKX-LABEL: imulq128: ; SKX: # %bb.0: -; SKX-NEXT: vpmullq %xmm0, %xmm1, %xmm0 # sched: [12:1.00] +; SKX-NEXT: vpmullq %xmm0, %xmm1, %xmm0 # sched: [12:1.50] ; SKX-NEXT: retq # sched: [7:1.00] %z = mul <2 x i64>%x, %y ret <2 x i64>%z @@ -176,7 +176,7 @@ define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) { ; ; SKX-LABEL: mulpd512: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vmulpd %zmm0, %zmm1, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vmulpd %zmm0, %zmm1, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %mul.i = fmul <8 x double> %x, %y @@ -206,7 +206,7 @@ define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) { ; ; SKX-LABEL: mulps512: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %mul.i = fmul <16 x float> %x, %y @@ -543,7 +543,7 @@ define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) { ; ; SKX-LABEL: vpmulld_test: ; SKX: # %bb.0: -; SKX-NEXT: vpmulld %zmm1, %zmm0, %zmm0 # sched: [10:0.67] +; SKX-NEXT: vpmulld %zmm1, %zmm0, %zmm0 # sched: [10:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %x = mul <16 x i32> %i, %j ret <16 x i32> %x @@ -712,7 +712,7 @@ define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i, ; SKX-LABEL: test_mask_vaddps: ; SKX: # %bb.0: ; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33] +; SKX-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] <16 x float> %j, <16 x i32> %mask1) nounwind readnone { @@ -732,7 +732,7 @@ define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i, <16 x ; SKX-LABEL: test_mask_vmulps: ; SKX: # %bb.0: ; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33] +; SKX-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %mask = icmp ne <16 x i32> %mask1, zeroinitializer %x = fmul <16 x float> %i, %j @@ -750,7 +750,7 @@ define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i, <16 x ; SKX-LABEL: test_mask_vminps: ; SKX: # %bb.0: ; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33] +; SKX-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %mask = icmp ne <16 x i32> %mask1, zeroinitializer %cmp_res = fcmp olt <16 x float> %i, %j @@ -769,7 +769,7 @@ define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i, <8 x d ; SKX-LABEL: test_mask_vminpd: ; SKX: # %bb.0: ; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33] +; SKX-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %mask = icmp ne <8 x i32> %mask1, zeroinitializer %cmp_res = fcmp olt <8 x double> %i, %j @@ -788,7 +788,7 @@ define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i, <16 x ; SKX-LABEL: test_mask_vmaxps: ; SKX: # %bb.0: ; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33] +; SKX-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %mask = icmp ne <16 x i32> %mask1, zeroinitializer %cmp_res = fcmp ogt <16 x float> %i, %j @@ -807,7 +807,7 @@ define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i, <8 x d ; SKX-LABEL: test_mask_vmaxpd: ; SKX: # %bb.0: ; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33] +; SKX-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %mask = icmp ne <8 x i32> %mask1, zeroinitializer %cmp_res = fcmp ogt <8 x double> %i, %j @@ -826,7 +826,7 @@ define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i, <16 x ; SKX-LABEL: test_mask_vsubps: ; SKX: # %bb.0: ; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33] +; SKX-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %mask = icmp ne <16 x i32> %mask1, zeroinitializer %x = fsub <16 x float> %i, %j @@ -862,7 +862,7 @@ define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i, <8 x d ; SKX-LABEL: test_mask_vaddpd: ; SKX: # %bb.0: ; SKX-NEXT: vptestmq %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33] +; SKX-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %mask = icmp ne <8 x i64> %mask1, zeroinitializer %x = fadd <8 x double> %i, %j @@ -880,7 +880,7 @@ define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j, <8 x i6 ; SKX-LABEL: test_maskz_vaddpd: ; SKX: # %bb.0: ; SKX-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [4:0.33] +; SKX-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %mask = icmp ne <8 x i64> %mask1, zeroinitializer %x = fadd <8 x double> %i, %j @@ -1071,10 +1071,10 @@ define double @test1(double %a, double %b) nounwind { ; SKX-NEXT: jne .LBB64_1 # sched: [1:0.50] ; SKX-NEXT: jnp .LBB64_2 # sched: [1:0.50] ; SKX-NEXT: .LBB64_1: # %l1 -; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; SKX-NEXT: .LBB64_2: # %l2 -; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %tobool = fcmp une double %a, %b br i1 %tobool, label %l1, label %l2 @@ -1104,10 +1104,10 @@ define float @test2(float %a, float %b) nounwind { ; SKX-NEXT: vucomiss %xmm0, %xmm1 # sched: [2:1.00] ; SKX-NEXT: jbe .LBB65_2 # sched: [1:0.50] ; SKX-NEXT: # %bb.1: # %l1 -; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; SKX-NEXT: .LBB65_2: # %l2 -; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %tobool = fcmp olt float %a, %b br i1 %tobool, label %l1, label %l2 @@ -1338,7 +1338,7 @@ define <16 x float> @sitof32(<16 x i32> %a) nounwind { ; ; SKX-LABEL: sitof32: ; SKX: # %bb.0: -; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = sitofp <16 x i32> %a to <16 x float> ret <16 x float> %b @@ -1352,7 +1352,7 @@ define <8 x double> @sltof864(<8 x i64> %a) { ; ; SKX-LABEL: sltof864: ; SKX: # %bb.0: -; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = sitofp <8 x i64> %a to <8 x double> ret <8 x double> %b @@ -1366,7 +1366,7 @@ define <4 x double> @slto4f64(<4 x i64> %a) { ; ; SKX-LABEL: slto4f64: ; SKX: # %bb.0: -; SKX-NEXT: vcvtqq2pd %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vcvtqq2pd %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = sitofp <4 x i64> %a to <4 x double> ret <4 x double> %b @@ -1380,7 +1380,7 @@ define <2 x double> @slto2f64(<2 x i64> %a) { ; ; SKX-LABEL: slto2f64: ; SKX: # %bb.0: -; SKX-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = sitofp <2 x i64> %a to <2 x double> ret <2 x double> %b @@ -1423,7 +1423,7 @@ define <4 x i64> @f64to4sl(<4 x double> %a) { ; ; SKX-LABEL: f64to4sl: ; SKX: # %bb.0: -; SKX-NEXT: vcvttpd2qq %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vcvttpd2qq %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = fptosi <4 x double> %a to <4 x i64> ret <4 x i64> %b @@ -1483,7 +1483,7 @@ define <8 x double> @ulto8f64(<8 x i64> %a) { ; ; SKX-LABEL: ulto8f64: ; SKX: # %bb.0: -; SKX-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = uitofp <8 x i64> %a to <8 x double> ret <8 x double> %b @@ -1498,8 +1498,8 @@ define <16 x double> @ulto16f64(<16 x i64> %a) { ; ; SKX-LABEL: ulto16f64: ; SKX: # %bb.0: -; SKX-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:0.33] -; SKX-NEXT: vcvtuqq2pd %zmm1, %zmm1 # sched: [4:0.33] +; SKX-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:0.50] +; SKX-NEXT: vcvtuqq2pd %zmm1, %zmm1 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = uitofp <16 x i64> %a to <16 x double> ret <16 x double> %b @@ -1513,7 +1513,7 @@ define <16 x i32> @f64to16si(<16 x float> %a) nounwind { ; ; SKX-LABEL: f64to16si: ; SKX: # %bb.0: -; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = fptosi <16 x float> %a to <16 x i32> ret <16 x i32> %b @@ -1527,7 +1527,7 @@ define <16 x i32> @f32to16ui(<16 x float> %a) nounwind { ; ; SKX-LABEL: f32to16ui: ; SKX: # %bb.0: -; SKX-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = fptoui <16 x float> %a to <16 x i32> ret <16 x i32> %b @@ -1543,7 +1543,7 @@ define <16 x i8> @f32to16uc(<16 x float> %f) { ; ; SKX-LABEL: f32to16uc: ; SKX: # %bb.0: -; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: vpmovdb %zmm0, %xmm0 # sched: [4:2.00] ; SKX-NEXT: vzeroupper # sched: [4:1.00] ; SKX-NEXT: retq # sched: [7:1.00] @@ -1560,7 +1560,7 @@ define <16 x i16> @f32to16us(<16 x float> %f) { ; ; SKX-LABEL: f32to16us: ; SKX: # %bb.0: -; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: vpmovdw %zmm0, %ymm0 # sched: [4:2.00] ; SKX-NEXT: retq # sched: [7:1.00] %res = fptoui <16 x float> %f to <16 x i16> @@ -1575,7 +1575,7 @@ define <8 x i32> @f32to8ui(<8 x float> %a) nounwind { ; ; SKX-LABEL: f32to8ui: ; SKX: # %bb.0: -; SKX-NEXT: vcvttps2udq %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vcvttps2udq %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = fptoui <8 x float> %a to <8 x i32> ret <8 x i32> %b @@ -1589,7 +1589,7 @@ define <4 x i32> @f32to4ui(<4 x float> %a) nounwind { ; ; SKX-LABEL: f32to4ui: ; SKX: # %bb.0: -; SKX-NEXT: vcvttps2udq %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vcvttps2udq %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = fptoui <4 x float> %a to <4 x i32> ret <4 x i32> %b @@ -1684,7 +1684,7 @@ define <8 x double> @i32to8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwi ; SKX-LABEL: i32to8f64_mask: ; SKX: # %bb.0: ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} # sched: [7:1.00] +; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; VLNOBW-LABEL: i32to8f64_mask: ; VLNOBW: # %bb.0: @@ -1706,7 +1706,7 @@ define <8 x double> @sito8f64_maskz(<8 x i32> %a, i8 %b) nounwind { ; SKX-LABEL: sito8f64_maskz: ; SKX: # %bb.0: ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} # sched: [7:1.00] +; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; VLNOBW-LABEL: sito8f64_maskz: ; VLNOBW: # %bb.0: @@ -2094,7 +2094,7 @@ define <8 x double> @slto8f64(<8 x i64> %a) { ; ; SKX-LABEL: slto8f64: ; SKX: # %bb.0: -; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = sitofp <8 x i64> %a to <8 x double> ret <8 x double> %b @@ -2109,8 +2109,8 @@ define <16 x double> @slto16f64(<16 x i64> %a) { ; ; SKX-LABEL: slto16f64: ; SKX: # %bb.0: -; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.33] -; SKX-NEXT: vcvtqq2pd %zmm1, %zmm1 # sched: [4:0.33] +; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.50] +; SKX-NEXT: vcvtqq2pd %zmm1, %zmm1 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = sitofp <16 x i64> %a to <16 x double> ret <16 x double> %b @@ -2158,7 +2158,7 @@ define <8 x double> @uito8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwin ; SKX-LABEL: uito8f64_mask: ; SKX: # %bb.0: ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} # sched: [7:1.00] +; SKX-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; VLNOBW-LABEL: uito8f64_mask: ; VLNOBW: # %bb.0: @@ -2180,7 +2180,7 @@ define <8 x double> @uito8f64_maskz(<8 x i32> %a, i8 %b) nounwind { ; SKX-LABEL: uito8f64_maskz: ; SKX: # %bb.0: ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} # sched: [7:1.00] +; SKX-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %1 = bitcast i8 %b to <8 x i1> %2 = uitofp <8 x i32> %a to <8 x double> @@ -2210,7 +2210,7 @@ define <16 x float> @uito16f32(<16 x i32> %a) nounwind { ; ; SKX-LABEL: uito16f32: ; SKX: # %bb.0: -; SKX-NEXT: vcvtudq2ps %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtudq2ps %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = uitofp <16 x i32> %a to <16 x float> ret <16 x float> %b @@ -2238,7 +2238,7 @@ define <8 x float> @uito8f32(<8 x i32> %a) nounwind { ; ; SKX-LABEL: uito8f32: ; SKX: # %bb.0: -; SKX-NEXT: vcvtudq2ps %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vcvtudq2ps %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = uitofp <8 x i32> %a to <8 x float> ret <8 x float> %b @@ -2252,7 +2252,7 @@ define <4 x float> @uito4f32(<4 x i32> %a) nounwind { ; ; SKX-LABEL: uito4f32: ; SKX: # %bb.0: -; SKX-NEXT: vcvtudq2ps %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtudq2ps %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = uitofp <4 x i32> %a to <4 x float> ret <4 x float> %b @@ -2266,7 +2266,7 @@ define i32 @fptosi(float %a) nounwind { ; ; SKX-LABEL: fptosi: ; SKX: # %bb.0: -; SKX-NEXT: vcvttss2si %xmm0, %eax # sched: [7:1.00] +; SKX-NEXT: vcvttss2si %xmm0, %eax # sched: [6:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %b = fptosi float %a to i32 ret i32 %b @@ -2326,7 +2326,7 @@ define <16 x float> @sbto16f32(<16 x i32> %a) { ; SKX: # %bb.0: ; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00] ; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] -; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %mask = icmp slt <16 x i32> %a, zeroinitializer %1 = sitofp <16 x i1> %mask to <16 x float> @@ -2343,7 +2343,7 @@ define <16 x float> @scto16f32(<16 x i8> %a) { ; SKX-LABEL: scto16f32: ; SKX: # %bb.0: ; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [3:1.00] -; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %1 = sitofp <16 x i8> %a to <16 x float> ret <16 x float> %1 @@ -2359,7 +2359,7 @@ define <16 x float> @ssto16f32(<16 x i16> %a) { ; SKX-LABEL: ssto16f32: ; SKX: # %bb.0: ; SKX-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [3:1.00] -; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %1 = sitofp <16 x i16> %a to <16 x float> ret <16 x float> %1 @@ -2482,8 +2482,8 @@ define <8 x float> @sbto8f32(<8 x float> %a) { ; SKX-LABEL: sbto8f32: ; SKX: # %bb.0: ; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 # sched: [4:0.33] -; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %cmpres = fcmp ogt <8 x float> %a, zeroinitializer %1 = sitofp <8 x i1> %cmpres to <8 x float> @@ -2501,8 +2501,8 @@ define <4 x float> @sbto4f32(<4 x float> %a) { ; SKX-LABEL: sbto4f32: ; SKX: # %bb.0: ; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [4:0.33] -; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %cmpres = fcmp ogt <4 x float> %a, zeroinitializer %1 = sitofp <4 x i1> %cmpres to <4 x float> @@ -2541,8 +2541,8 @@ define <2 x float> @sbto2f32(<2 x float> %a) { ; SKX-LABEL: sbto2f32: ; SKX: # %bb.0: ; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [4:0.33] -; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %cmpres = fcmp ogt <2 x float> %a, zeroinitializer %1 = sitofp <2 x i1> %cmpres to <2 x float> @@ -2561,9 +2561,9 @@ define <2 x double> @sbto2f64(<2 x double> %a) { ; SKX-LABEL: sbto2f64: ; SKX: # %bb.0: ; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00] -; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00] +; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %cmpres = fcmp ogt <2 x double> %a, zeroinitializer %1 = sitofp <2 x i1> %cmpres to <2 x double> @@ -2580,7 +2580,7 @@ define <16 x float> @ucto16f32(<16 x i8> %a) { ; SKX-LABEL: ucto16f32: ; SKX: # %bb.0: ; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00] -; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = uitofp <16 x i8> %a to <16 x float> ret <16 x float>%b @@ -2614,7 +2614,7 @@ define <16 x float> @swto16f32(<16 x i16> %a) { ; SKX-LABEL: swto16f32: ; SKX: # %bb.0: ; SKX-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [3:1.00] -; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = sitofp <16 x i16> %a to <16 x float> ret <16 x float> %b @@ -2686,7 +2686,7 @@ define <16 x float> @uwto16f32(<16 x i16> %a) { ; SKX-LABEL: uwto16f32: ; SKX: # %bb.0: ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00] -; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = uitofp <16 x i16> %a to <16 x float> ret <16 x float> %b @@ -2736,7 +2736,7 @@ define <16 x float> @sito16f32(<16 x i32> %a) { ; ; SKX-LABEL: sito16f32: ; SKX: # %bb.0: -; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = sitofp <16 x i32> %a to <16 x float> ret <16 x float> %b @@ -2772,7 +2772,7 @@ define <16 x float> @usto16f32(<16 x i16> %a) { ; SKX-LABEL: usto16f32: ; SKX: # %bb.0: ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00] -; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = uitofp <16 x i16> %a to <16 x float> ret <16 x float> %b @@ -2791,8 +2791,8 @@ define <16 x float> @ubto16f32(<16 x i32> %a) { ; SKX: # %bb.0: ; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00] ; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] -; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:0.50] -; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00] +; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %mask = icmp slt <16 x i32> %a, zeroinitializer %1 = uitofp <16 x i1> %mask to <16 x float> @@ -2814,7 +2814,7 @@ define <16 x double> @ubto16f64(<16 x i32> %a) { ; SKX: # %bb.0: ; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00] ; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] -; SKX-NEXT: vpsrld $31, %zmm0, %zmm1 # sched: [1:0.50] +; SKX-NEXT: vpsrld $31, %zmm0, %zmm1 # sched: [1:1.00] ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00] ; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00] ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00] @@ -2945,7 +2945,7 @@ define <2 x double> @ubto2f64(<2 x i32> %a) { ; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00] -; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00] +; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %mask = icmp ne <2 x i32> %a, zeroinitializer %1 = uitofp <2 x i1> %mask to <2 x double> @@ -4253,7 +4253,7 @@ define <16 x i32> @zext_16i1_to_16xi32(i16 %b) { ; SKX: # %bb.0: ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] ; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] -; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:0.50] +; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %a = bitcast i16 %b to <16 x i1> %c = zext <16 x i1> %a to <16 x i32> @@ -4272,7 +4272,7 @@ define <8 x i64> @zext_8i1_to_8xi64(i8 %b) { ; SKX: # %bb.0: ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] ; SKX-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.25] -; SKX-NEXT: vpsrlq $63, %zmm0, %zmm0 # sched: [1:0.50] +; SKX-NEXT: vpsrlq $63, %zmm0, %zmm0 # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %a = bitcast i8 %b to <8 x i1> %c = zext <8 x i1> %a to <8 x i64> @@ -4312,7 +4312,7 @@ define i16 @trunc_16i32_to_16i1(<16 x i32> %a) { ; ; SKX-LABEL: trunc_16i32_to_16i1: ; SKX: # %bb.0: -; SKX-NEXT: vpslld $31, %zmm0, %zmm0 # sched: [1:0.50] +; SKX-NEXT: vpslld $31, %zmm0, %zmm0 # sched: [1:1.00] ; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00] ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] ; SKX-NEXT: # kill: def $ax killed $ax killed $eax @@ -4502,7 +4502,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; ; SKX-LABEL: test21: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $7, %zmm2, %zmm2 # sched: [1:0.50] +; SKX-NEXT: vpsllw $7, %zmm2, %zmm2 # sched: [1:1.00] ; SKX-NEXT: vpmovb2m %zmm2, %k1 # sched: [1:1.00] ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] ; SKX-NEXT: kshiftrq $32, %k1, %k1 # sched: [3:1.00] @@ -4666,7 +4666,7 @@ define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 { ; SKX: # %bb.0: ; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # sched: [3:1.00] ; SKX-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.25] -; SKX-NEXT: vpsrlw $15, %zmm0, %zmm0 # sched: [1:0.50] +; SKX-NEXT: vpsrlw $15, %zmm0, %zmm0 # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %mask = icmp eq <32 x i16> %x, %y %1 = zext <32 x i1> %mask to <32 x i16> @@ -4763,8 +4763,8 @@ define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 ; ; SKX-LABEL: test_x86_fmadd_ps_z: ; SKX: # %bb.0: -; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.33] -; SKX-NEXT: vaddps %zmm2, %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] +; SKX-NEXT: vaddps %zmm2, %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %x = fmul <16 x float> %a0, %a1 %res = fadd <16 x float> %x, %a2 @@ -4780,8 +4780,8 @@ define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 ; ; SKX-LABEL: test_x86_fmsub_ps_z: ; SKX: # %bb.0: -; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.33] -; SKX-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] +; SKX-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %x = fmul <16 x float> %a0, %a1 %res = fsub <16 x float> %x, %a2 @@ -4797,8 +4797,8 @@ define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <1 ; ; SKX-LABEL: test_x86_fnmadd_ps_z: ; SKX: # %bb.0: -; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.33] -; SKX-NEXT: vsubps %zmm0, %zmm2, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] +; SKX-NEXT: vsubps %zmm0, %zmm2, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %x = fmul <16 x float> %a0, %a1 %res = fsub <16 x float> %a2, %x @@ -4815,9 +4815,9 @@ define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <1 ; ; SKX-LABEL: test_x86_fnmsub_ps_z: ; SKX: # %bb.0: -; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %x = fmul <16 x float> %a0, %a1 %y = fsub <16 x float> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 ; ; SKX-LABEL: test_x86_fmadd_pd_z: ; SKX: # %bb.0: -; SKX-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [4:0.33] -; SKX-NEXT: vaddpd %zmm2, %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [4:0.50] +; SKX-NEXT: vaddpd %zmm2, %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %x = fmul <8 x double> %a0, %a1 %res = fadd <8 x double> %x, %a2 @@ -4854,8 +4854,8 @@ define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 ; ; SKX-LABEL: test_x86_fmsub_pd_z: ; SKX: # %bb.0: -; SKX-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [4:0.33] -; SKX-NEXT: vsubpd %zmm2, %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [4:0.50] +; SKX-NEXT: vsubpd %zmm2, %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %x = fmul <8 x double> %a0, %a1 %res = fsub <8 x double> %x, %a2 @@ -4871,8 +4871,8 @@ define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) { ; ; SKX-LABEL: test_x86_fmsub_213: ; SKX: # %bb.0: -; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] -; SKX-NEXT: vsubsd %xmm2, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vsubsd %xmm2, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %x = fmul double %a0, %a1 %res = fsub double %x, %a2 @@ -4888,7 +4888,7 @@ define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) { ; ; SKX-LABEL: test_x86_fmsub_213_m: ; SKX: # %bb.0: -; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %a2 = load double , double *%a2_ptr @@ -4907,7 +4907,7 @@ define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) { ; SKX-LABEL: test_x86_fmsub_231_m: ; SKX: # %bb.0: ; SKX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %a2 = load double , double *%a2_ptr %x = fmul double %a0, %a2 @@ -4925,7 +4925,7 @@ define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind { ; SKX-LABEL: test231_br: ; SKX: # %bb.0: ; SKX-NEXT: vmulps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50] -; SKX-NEXT: vaddps %zmm1, %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b1 = fmul <16 x float> %a1, %b2 = fadd <16 x float> %b1, %a2 @@ -4941,7 +4941,7 @@ define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind { ; ; SKX-LABEL: test213_br: ; SKX: # %bb.0: -; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b1 = fmul <16 x float> %a1, %a2 @@ -4964,7 +4964,7 @@ define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <1 ; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50] ; SKX-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:1.00] ; SKX-NEXT: vmulps (%rdi), %zmm0, %zmm2 # sched: [11:0.50] -; SKX-NEXT: vaddps %zmm1, %zmm2, %zmm0 {%k1} # sched: [4:0.33] +; SKX-NEXT: vaddps %zmm1, %zmm2, %zmm0 {%k1} # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 %x = fmul <16 x float> %a0, %a2 @@ -4989,7 +4989,7 @@ define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <1 ; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50] ; SKX-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:1.00] ; SKX-NEXT: vmulps (%rdi), %zmm0, %zmm0 # sched: [11:0.50] -; SKX-NEXT: vaddps %zmm1, %zmm0, %zmm1 {%k1} # sched: [4:0.33] +; SKX-NEXT: vaddps %zmm1, %zmm0, %zmm1 {%k1} # sched: [4:0.50] ; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 @@ -5014,7 +5014,7 @@ define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <1 ; SKX: # %bb.0: ; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50] ; SKX-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:1.00] -; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.50] ; SKX-NEXT: vaddps (%rdi), %zmm0, %zmm1 {%k1} # sched: [11:0.50] ; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] @@ -5035,7 +5035,7 @@ define <16 x i32> @vpandd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnon ; SKX-LABEL: vpandd: ; SKX: # %bb.0: # %entry ; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: ; Force the execution domain with an add. @@ -5055,7 +5055,7 @@ define <16 x i32> @vpandnd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readno ; SKX-LABEL: vpandnd: ; SKX: # %bb.0: # %entry ; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: ; Force the execution domain with an add. @@ -5077,7 +5077,7 @@ define <16 x i32> @vpord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ; SKX-LABEL: vpord: ; SKX: # %bb.0: # %entry ; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: ; Force the execution domain with an add. @@ -5097,7 +5097,7 @@ define <16 x i32> @vpxord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnon ; SKX-LABEL: vpxord: ; SKX: # %bb.0: # %entry ; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: ; Force the execution domain with an add. @@ -5117,7 +5117,7 @@ define <8 x i64> @vpandq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone s ; SKX-LABEL: vpandq: ; SKX: # %bb.0: # %entry ; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: ; Force the execution domain with an add. @@ -5136,7 +5136,7 @@ define <8 x i64> @vpandnq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ; SKX-LABEL: vpandnq: ; SKX: # %bb.0: # %entry ; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: ; Force the execution domain with an add. @@ -5156,7 +5156,7 @@ define <8 x i64> @vporq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ss ; SKX-LABEL: vporq: ; SKX: # %bb.0: # %entry ; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: ; Force the execution domain with an add. @@ -5175,7 +5175,7 @@ define <8 x i64> @vpxorq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone s ; SKX-LABEL: vpxorq: ; SKX: # %bb.0: # %entry ; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: ; Force the execution domain with an add. @@ -5192,7 +5192,7 @@ define <64 x i8> @and_v64i8(<64 x i8> %a, <64 x i8> %b) { ; ; SKX-LABEL: and_v64i8: ; SKX: # %bb.0: -; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %res = and <64 x i8> %a, %b ret <64 x i8> %res @@ -5206,7 +5206,7 @@ define <64 x i8> @andn_v64i8(<64 x i8> %a, <64 x i8> %b) { ; ; SKX-LABEL: andn_v64i8: ; SKX: # %bb.0: -; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b2 = xor <64 x i8> %b, @or_v64i8(<64 x i8> %a, <64 x i8> %b) { ; ; SKX-LABEL: or_v64i8: ; SKX: # %bb.0: -; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %res = or <64 x i8> %a, %b ret <64 x i8> %res @@ -5238,7 +5238,7 @@ define <64 x i8> @xor_v64i8(<64 x i8> %a, <64 x i8> %b) { ; ; SKX-LABEL: xor_v64i8: ; SKX: # %bb.0: -; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %res = xor <64 x i8> %a, %b ret <64 x i8> %res @@ -5252,7 +5252,7 @@ define <32 x i16> @and_v32i16(<32 x i16> %a, <32 x i16> %b) { ; ; SKX-LABEL: and_v32i16: ; SKX: # %bb.0: -; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %res = and <32 x i16> %a, %b ret <32 x i16> %res @@ -5266,7 +5266,7 @@ define <32 x i16> @andn_v32i16(<32 x i16> %a, <32 x i16> %b) { ; ; SKX-LABEL: andn_v32i16: ; SKX: # %bb.0: -; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b2 = xor <32 x i16> %b, @@ -5282,7 +5282,7 @@ define <32 x i16> @or_v32i16(<32 x i16> %a, <32 x i16> %b) { ; ; SKX-LABEL: or_v32i16: ; SKX: # %bb.0: -; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %res = or <32 x i16> %a, %b ret <32 x i16> %res @@ -5296,7 +5296,7 @@ define <32 x i16> @xor_v32i16(<32 x i16> %a, <32 x i16> %b) { ; ; SKX-LABEL: xor_v32i16: ; SKX: # %bb.0: -; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %res = xor <32 x i16> %a, %b ret <32 x i16> %res @@ -5313,8 +5313,8 @@ define <16 x float> @masked_and_v16f32(<16 x float> %a, <16 x float> %b, <16 x f ; SKX-LABEL: masked_and_v16f32: ; SKX: # %bb.0: ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.33] -; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] +; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %a1 = bitcast <16 x float> %a to <16 x i32> %b1 = bitcast <16 x float> %b to <16 x i32> @@ -5338,8 +5338,8 @@ define <16 x float> @masked_or_v16f32(<16 x float> %a, <16 x float> %b, <16 x fl ; SKX-LABEL: masked_or_v16f32: ; SKX: # %bb.0: ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.33] -; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] +; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %a1 = bitcast <16 x float> %a to <16 x i32> %b1 = bitcast <16 x float> %b to <16 x i32> @@ -5363,8 +5363,8 @@ define <16 x float> @masked_xor_v16f32(<16 x float> %a, <16 x float> %b, <16 x f ; SKX-LABEL: masked_xor_v16f32: ; SKX: # %bb.0: ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.33] -; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] +; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %a1 = bitcast <16 x float> %a to <16 x i32> %b1 = bitcast <16 x float> %b to <16 x i32> @@ -5388,8 +5388,8 @@ define <8 x double> @masked_and_v8f64(<8 x double> %a, <8 x double> %b, <8 x dou ; SKX-LABEL: masked_and_v8f64: ; SKX: # %bb.0: ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.33] -; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] +; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %a1 = bitcast <8 x double> %a to <8 x i64> %b1 = bitcast <8 x double> %b to <8 x i64> @@ -5413,8 +5413,8 @@ define <8 x double> @masked_or_v8f64(<8 x double> %a, <8 x double> %b, <8 x doub ; SKX-LABEL: masked_or_v8f64: ; SKX: # %bb.0: ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.33] -; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] +; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %a1 = bitcast <8 x double> %a to <8 x i64> %b1 = bitcast <8 x double> %b to <8 x i64> @@ -5438,8 +5438,8 @@ define <8 x double> @masked_xor_v8f64(<8 x double> %a, <8 x double> %b, <8 x dou ; SKX-LABEL: masked_xor_v8f64: ; SKX: # %bb.0: ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.33] -; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] +; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %a1 = bitcast <8 x double> %a to <8 x i64> %b1 = bitcast <8 x double> %b to <8 x i64> @@ -5462,7 +5462,7 @@ define <8 x i64> @test_mm512_mask_and_epi32(<8 x i64> %__src, i16 zeroext %__k, ; SKX-LABEL: test_mm512_mask_and_epi32: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33] +; SKX-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %and1.i.i = and <8 x i64> %__a, %__b @@ -5484,7 +5484,7 @@ define <8 x i64> @test_mm512_mask_or_epi32(<8 x i64> %__src, i16 zeroext %__k, < ; SKX-LABEL: test_mm512_mask_or_epi32: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33] +; SKX-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %or1.i.i = or <8 x i64> %__a, %__b @@ -5506,7 +5506,7 @@ define <8 x i64> @test_mm512_mask_xor_epi32(<8 x i64> %__src, i16 zeroext %__k, ; SKX-LABEL: test_mm512_mask_xor_epi32: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33] +; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %xor1.i.i = xor <8 x i64> %__a, %__b @@ -5528,7 +5528,7 @@ define <8 x double> @test_mm512_mask_xor_pd(<8 x double> %__W, i8 zeroext %__U, ; SKX-LABEL: test_mm512_mask_xor_pd: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33] +; SKX-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %0 = bitcast <8 x double> %__A to <8 x i64> @@ -5550,7 +5550,7 @@ define <8 x double> @test_mm512_maskz_xor_pd(i8 zeroext %__U, <8 x double> %__A, ; SKX-LABEL: test_mm512_maskz_xor_pd: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] +; SKX-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %0 = bitcast <8 x double> %__A to <8 x i64> @@ -5572,7 +5572,7 @@ define <16 x float> @test_mm512_mask_xor_ps(<16 x float> %__W, i16 zeroext %__U, ; SKX-LABEL: test_mm512_mask_xor_ps: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33] +; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %0 = bitcast <16 x float> %__A to <16 x i32> @@ -5594,7 +5594,7 @@ define <16 x float> @test_mm512_maskz_xor_ps(i16 zeroext %__U, <16 x float> %__A ; SKX-LABEL: test_mm512_maskz_xor_ps: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] +; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %0 = bitcast <16 x float> %__A to <16 x i32> @@ -5616,7 +5616,7 @@ define <8 x double> @test_mm512_mask_or_pd(<8 x double> %__W, i8 zeroext %__U, < ; SKX-LABEL: test_mm512_mask_or_pd: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.33] +; SKX-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %0 = bitcast <8 x double> %__A to <8 x i64> @@ -5638,7 +5638,7 @@ define <8 x double> @test_mm512_maskz_or_pd(i8 zeroext %__U, <8 x double> %__A, ; SKX-LABEL: test_mm512_maskz_or_pd: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.33] +; SKX-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %0 = bitcast <8 x double> %__A to <8 x i64> @@ -5660,7 +5660,7 @@ define <16 x float> @test_mm512_mask_or_ps(<16 x float> %__W, i16 zeroext %__U, ; SKX-LABEL: test_mm512_mask_or_ps: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.33] +; SKX-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %0 = bitcast <16 x float> %__A to <16 x i32> @@ -5682,7 +5682,7 @@ define <16 x float> @test_mm512_maskz_or_ps(i16 zeroext %__U, <16 x float> %__A, ; SKX-LABEL: test_mm512_maskz_or_ps: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.33] +; SKX-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %0 = bitcast <16 x float> %__A to <16 x i32> @@ -5704,7 +5704,7 @@ define <8 x double> @test_mm512_mask_and_pd(<8 x double> %__W, i8 zeroext %__U, ; SKX-LABEL: test_mm512_mask_and_pd: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.33] +; SKX-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %0 = bitcast <8 x double> %__A to <8 x i64> @@ -5726,7 +5726,7 @@ define <8 x double> @test_mm512_maskz_and_pd(i8 zeroext %__U, <8 x double> %__A, ; SKX-LABEL: test_mm512_maskz_and_pd: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.33] +; SKX-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %0 = bitcast <8 x double> %__A to <8 x i64> @@ -5748,7 +5748,7 @@ define <16 x float> @test_mm512_mask_and_ps(<16 x float> %__W, i16 zeroext %__U, ; SKX-LABEL: test_mm512_mask_and_ps: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.33] +; SKX-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %0 = bitcast <16 x float> %__A to <16 x i32> @@ -5770,7 +5770,7 @@ define <16 x float> @test_mm512_maskz_and_ps(i16 zeroext %__U, <16 x float> %__A ; SKX-LABEL: test_mm512_maskz_and_ps: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.33] +; SKX-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %0 = bitcast <16 x float> %__A to <16 x i32> @@ -5792,7 +5792,7 @@ define <8 x double> @test_mm512_mask_andnot_pd(<8 x double> %__W, i8 zeroext %__ ; SKX-LABEL: test_mm512_mask_andnot_pd: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33] +; SKX-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %0 = bitcast <8 x double> %__A to <8 x i64> @@ -5815,7 +5815,7 @@ define <8 x double> @test_mm512_maskz_andnot_pd(i8 zeroext %__U, <8 x double> %_ ; SKX-LABEL: test_mm512_maskz_andnot_pd: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] +; SKX-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %0 = bitcast <8 x double> %__A to <8 x i64> @@ -5838,7 +5838,7 @@ define <16 x float> @test_mm512_mask_andnot_ps(<16 x float> %__W, i16 zeroext %_ ; SKX-LABEL: test_mm512_mask_andnot_ps: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33] +; SKX-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %0 = bitcast <16 x float> %__A to <16 x i32> @@ -5861,7 +5861,7 @@ define <16 x float> @test_mm512_maskz_andnot_ps(i16 zeroext %__U, <16 x float> % ; SKX-LABEL: test_mm512_maskz_andnot_ps: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] +; SKX-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %0 = bitcast <16 x float> %__A to <16 x i32> @@ -7927,7 +7927,7 @@ define void @store_32i1_1(<32 x i1>* %a, <32 x i16> %v) { ; ; SKX-LABEL: store_32i1_1: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $15, %zmm0, %zmm0 # sched: [1:0.50] +; SKX-NEXT: vpsllw $15, %zmm0, %zmm0 # sched: [1:1.00] ; SKX-NEXT: vpmovw2m %zmm0, %k0 # sched: [1:1.00] ; SKX-NEXT: kmovd %k0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: vzeroupper # sched: [4:1.00] @@ -7950,7 +7950,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) { ; ; SKX-LABEL: store_64i1: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $7, %zmm0, %zmm0 # sched: [1:0.50] +; SKX-NEXT: vpsllw $7, %zmm0, %zmm0 # sched: [1:1.00] ; SKX-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:1.00] ; SKX-NEXT: kmovq %k0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: vzeroupper # sched: [4:1.00] @@ -8709,7 +8709,7 @@ define <16 x float> @broadcast_ss_spill(float %x) { ; SKX: # %bb.0: ; SKX-NEXT: subq $24, %rsp # sched: [1:0.25] ; SKX-NEXT: .cfi_def_cfa_offset 32 -; SKX-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00] ; SKX-NEXT: callq func_f32 ; SKX-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:0.50] @@ -8741,7 +8741,7 @@ define <8 x double> @broadcast_sd_spill(double %x) { ; SKX: # %bb.0: ; SKX-NEXT: subq $24, %rsp # sched: [1:0.25] ; SKX-NEXT: .cfi_def_cfa_offset 32 -; SKX-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00] ; SKX-NEXT: callq func_f64 ; SKX-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:0.50] diff --git a/llvm/test/CodeGen/X86/avx512vpopcntdq-schedule.ll b/llvm/test/CodeGen/X86/avx512vpopcntdq-schedule.ll index dc75d05825cc..53df4bfaba14 100644 --- a/llvm/test/CodeGen/X86/avx512vpopcntdq-schedule.ll +++ b/llvm/test/CodeGen/X86/avx512vpopcntdq-schedule.ll @@ -25,15 +25,15 @@ define void @test_vpopcntd(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> *%a2, i16 ; ICELAKE: # %bb.0: ; ICELAKE-NEXT: kmovd %esi, %k1 # sched: [1:1.00] ; ICELAKE-NEXT: #APP -; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 # sched: [1:0.50] -; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 {%k1} # sched: [1:0.50] -; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] -; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 # sched: [8:0.50] -; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 {%k1} # sched: [8:0.50] -; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] -; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 # sched: [8:0.50] -; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} # sched: [8:0.50] -; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} # sched: [8:0.50] +; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 # sched: [1:1.00] +; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 {%k1} # sched: [1:1.00] +; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00] +; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 # sched: [8:1.00] +; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 {%k1} # sched: [8:1.00] +; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] +; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 # sched: [8:1.00] +; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} # sched: [8:1.00] +; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} # sched: [8:1.00] ; ICELAKE-NEXT: #NO_APP ; ICELAKE-NEXT: vzeroupper # sched: [4:1.00] ; ICELAKE-NEXT: retq # sched: [7:1.00] @@ -63,15 +63,15 @@ define void @test_vpopcntq(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> *%a2, i8 %a3) ; ICELAKE: # %bb.0: ; ICELAKE-NEXT: kmovd %esi, %k1 # sched: [1:1.00] ; ICELAKE-NEXT: #APP -; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 # sched: [1:0.50] -; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 {%k1} # sched: [1:0.50] -; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] -; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 # sched: [8:0.50] -; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 {%k1} # sched: [8:0.50] -; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] -; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 # sched: [8:0.50] -; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} # sched: [8:0.50] -; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} # sched: [8:0.50] +; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 # sched: [1:1.00] +; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 {%k1} # sched: [1:1.00] +; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00] +; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 # sched: [8:1.00] +; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 {%k1} # sched: [8:1.00] +; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] +; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 # sched: [8:1.00] +; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} # sched: [8:1.00] +; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} # sched: [8:1.00] ; ICELAKE-NEXT: #NO_APP ; ICELAKE-NEXT: vzeroupper # sched: [4:1.00] ; ICELAKE-NEXT: retq # sched: [7:1.00] diff --git a/llvm/test/CodeGen/X86/fma-schedule.ll b/llvm/test/CodeGen/X86/fma-schedule.ll index f69c62a86807..819b9c7f27d5 100644 --- a/llvm/test/CodeGen/X86/fma-schedule.ll +++ b/llvm/test/CodeGen/X86/fma-schedule.ll @@ -75,9 +75,9 @@ define void @test_vfmaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKX-LABEL: test_vfmaddpd_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.33] -; SKX-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.33] -; SKX-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.33] +; SKX-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50] +; SKX-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50] +; SKX-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50] ; SKX-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] ; SKX-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] ; SKX-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] @@ -167,9 +167,9 @@ define void @test_vfmaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SKX-LABEL: test_vfmaddpd_256: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.33] -; SKX-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.33] -; SKX-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.33] +; SKX-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50] +; SKX-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50] +; SKX-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50] ; SKX-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50] ; SKX-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50] ; SKX-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50] @@ -257,9 +257,9 @@ define void @test_vfmaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2 ; SKX-LABEL: test_vfmaddps_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.33] -; SKX-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.33] -; SKX-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.33] +; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50] +; SKX-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50] +; SKX-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50] ; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] ; SKX-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] ; SKX-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] @@ -349,9 +349,9 @@ define void @test_vfmaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2 ; SKX-LABEL: test_vfmaddps_256: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.33] -; SKX-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.33] -; SKX-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.33] +; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50] +; SKX-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50] +; SKX-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50] ; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50] ; SKX-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50] ; SKX-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50] @@ -439,9 +439,9 @@ define void @test_vfmaddsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKX-LABEL: test_vfmaddsd_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.33] -; SKX-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.33] -; SKX-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.33] +; SKX-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50] +; SKX-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50] +; SKX-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50] ; SKX-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50] ; SKX-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50] ; SKX-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50] @@ -527,9 +527,9 @@ define void @test_vfmaddss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2 ; SKX-LABEL: test_vfmaddss_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.33] -; SKX-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.33] -; SKX-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.33] +; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50] +; SKX-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50] +; SKX-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50] ; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50] ; SKX-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50] ; SKX-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50] @@ -619,9 +619,9 @@ define void @test_vfmaddsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; SKX-LABEL: test_vfmaddsubpd_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.33] -; SKX-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.33] -; SKX-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.33] +; SKX-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.50] +; SKX-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.50] +; SKX-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.50] ; SKX-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50] ; SKX-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50] ; SKX-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50] @@ -711,9 +711,9 @@ define void @test_vfmaddsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x doubl ; SKX-LABEL: test_vfmaddsubpd_256: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.33] -; SKX-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.33] -; SKX-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.33] +; SKX-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.50] +; SKX-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.50] +; SKX-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.50] ; SKX-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50] ; SKX-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50] ; SKX-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50] @@ -801,9 +801,9 @@ define void @test_vfmaddsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SKX-LABEL: test_vfmaddsubps_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.33] -; SKX-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.33] -; SKX-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.33] +; SKX-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.50] +; SKX-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.50] +; SKX-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.50] ; SKX-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50] ; SKX-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50] ; SKX-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50] @@ -893,9 +893,9 @@ define void @test_vfmaddsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> ; SKX-LABEL: test_vfmaddsubps_256: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.33] -; SKX-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.33] -; SKX-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.33] +; SKX-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.50] +; SKX-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.50] +; SKX-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.50] ; SKX-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50] ; SKX-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50] ; SKX-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50] @@ -987,9 +987,9 @@ define void @test_vfmsubaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; SKX-LABEL: test_vfmsubaddpd_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.33] -; SKX-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.33] -; SKX-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.33] +; SKX-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.50] +; SKX-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.50] +; SKX-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.50] ; SKX-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50] ; SKX-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50] ; SKX-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50] @@ -1079,9 +1079,9 @@ define void @test_vfmsubaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x doubl ; SKX-LABEL: test_vfmsubaddpd_256: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.33] -; SKX-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.33] -; SKX-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.33] +; SKX-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.50] +; SKX-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.50] +; SKX-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.50] ; SKX-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50] ; SKX-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50] ; SKX-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50] @@ -1169,9 +1169,9 @@ define void @test_vfmsubaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SKX-LABEL: test_vfmsubaddps_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.33] -; SKX-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.33] -; SKX-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.33] +; SKX-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.50] +; SKX-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.50] +; SKX-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.50] ; SKX-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50] ; SKX-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50] ; SKX-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50] @@ -1261,9 +1261,9 @@ define void @test_vfmsubaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> ; SKX-LABEL: test_vfmsubaddps_256: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.33] -; SKX-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.33] -; SKX-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.33] +; SKX-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.50] +; SKX-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.50] +; SKX-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.50] ; SKX-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50] ; SKX-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50] ; SKX-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50] @@ -1355,9 +1355,9 @@ define void @test_vfmsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKX-LABEL: test_vfmsubpd_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.33] -; SKX-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.33] -; SKX-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.33] +; SKX-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50] +; SKX-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50] +; SKX-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50] ; SKX-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] ; SKX-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] ; SKX-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] @@ -1447,9 +1447,9 @@ define void @test_vfmsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SKX-LABEL: test_vfmsubpd_256: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.33] -; SKX-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.33] -; SKX-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.33] +; SKX-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.50] +; SKX-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.50] +; SKX-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.50] ; SKX-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50] ; SKX-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50] ; SKX-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50] @@ -1537,9 +1537,9 @@ define void @test_vfmsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2 ; SKX-LABEL: test_vfmsubps_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.33] -; SKX-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.33] -; SKX-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.33] +; SKX-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50] +; SKX-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50] +; SKX-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50] ; SKX-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] ; SKX-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] ; SKX-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] @@ -1629,9 +1629,9 @@ define void @test_vfmsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2 ; SKX-LABEL: test_vfmsubps_256: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.33] -; SKX-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.33] -; SKX-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.33] +; SKX-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.50] +; SKX-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.50] +; SKX-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.50] ; SKX-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50] ; SKX-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50] ; SKX-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50] @@ -1719,9 +1719,9 @@ define void @test_vfmsubsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKX-LABEL: test_vfmsubsd_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.33] -; SKX-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.33] -; SKX-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.33] +; SKX-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50] +; SKX-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50] +; SKX-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50] ; SKX-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50] ; SKX-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50] ; SKX-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50] @@ -1807,9 +1807,9 @@ define void @test_vfmsubss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2 ; SKX-LABEL: test_vfmsubss_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.33] -; SKX-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.33] -; SKX-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.33] +; SKX-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50] +; SKX-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50] +; SKX-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50] ; SKX-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50] ; SKX-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50] ; SKX-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50] @@ -1899,9 +1899,9 @@ define void @test_vfnmaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKX-LABEL: test_vfnmaddpd_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.33] -; SKX-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.33] -; SKX-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.33] +; SKX-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50] +; SKX-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50] +; SKX-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50] ; SKX-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] ; SKX-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] ; SKX-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] @@ -1991,9 +1991,9 @@ define void @test_vfnmaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SKX-LABEL: test_vfnmaddpd_256: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.33] -; SKX-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.33] -; SKX-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.33] +; SKX-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.50] +; SKX-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.50] +; SKX-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.50] ; SKX-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50] ; SKX-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50] ; SKX-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50] @@ -2081,9 +2081,9 @@ define void @test_vfnmaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a ; SKX-LABEL: test_vfnmaddps_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.33] -; SKX-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.33] +; SKX-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50] +; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50] +; SKX-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50] ; SKX-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] ; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] ; SKX-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] @@ -2173,9 +2173,9 @@ define void @test_vfnmaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a ; SKX-LABEL: test_vfnmaddps_256: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.33] -; SKX-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.33] +; SKX-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.50] +; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.50] +; SKX-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.50] ; SKX-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50] ; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50] ; SKX-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50] @@ -2263,9 +2263,9 @@ define void @test_vfnmaddsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKX-LABEL: test_vfnmaddsd_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.33] -; SKX-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.33] -; SKX-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.33] +; SKX-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50] +; SKX-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50] +; SKX-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50] ; SKX-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50] ; SKX-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50] ; SKX-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50] @@ -2351,9 +2351,9 @@ define void @test_vfnmaddss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a ; SKX-LABEL: test_vfnmaddss_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.33] -; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.33] -; SKX-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.33] +; SKX-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50] +; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50] +; SKX-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50] ; SKX-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50] ; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50] ; SKX-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50] @@ -2443,9 +2443,9 @@ define void @test_vfnmsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKX-LABEL: test_vfnmsubpd_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.33] -; SKX-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.33] -; SKX-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.33] +; SKX-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50] +; SKX-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50] +; SKX-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50] ; SKX-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] ; SKX-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] ; SKX-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] @@ -2535,9 +2535,9 @@ define void @test_vfnmsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SKX-LABEL: test_vfnmsubpd_256: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.33] -; SKX-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.33] -; SKX-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.33] +; SKX-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.50] +; SKX-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.50] +; SKX-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.50] ; SKX-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50] ; SKX-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50] ; SKX-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50] @@ -2625,9 +2625,9 @@ define void @test_vfnmsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a ; SKX-LABEL: test_vfnmsubps_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.33] -; SKX-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.33] -; SKX-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.33] +; SKX-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50] +; SKX-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50] +; SKX-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50] ; SKX-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] ; SKX-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] ; SKX-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] @@ -2717,9 +2717,9 @@ define void @test_vfnmsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a ; SKX-LABEL: test_vfnmsubps_256: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.33] -; SKX-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.33] -; SKX-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.33] +; SKX-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.50] +; SKX-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.50] +; SKX-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.50] ; SKX-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50] ; SKX-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50] ; SKX-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50] @@ -2807,9 +2807,9 @@ define void @test_vfnmsubsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKX-LABEL: test_vfnmsubsd_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.33] -; SKX-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.33] -; SKX-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.33] +; SKX-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50] +; SKX-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50] +; SKX-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50] ; SKX-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50] ; SKX-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50] ; SKX-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50] @@ -2895,9 +2895,9 @@ define void @test_vfnmsubss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a ; SKX-LABEL: test_vfnmsubss_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.33] -; SKX-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.33] -; SKX-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.33] +; SKX-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50] +; SKX-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50] +; SKX-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50] ; SKX-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50] ; SKX-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50] ; SKX-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50] diff --git a/llvm/test/CodeGen/X86/mmx-schedule.ll b/llvm/test/CodeGen/X86/mmx-schedule.ll index 9a60934cba5f..ac87a55f2a70 100644 --- a/llvm/test/CodeGen/X86/mmx-schedule.ll +++ b/llvm/test/CodeGen/X86/mmx-schedule.ll @@ -152,9 +152,9 @@ define <2 x double> @test_cvtpi2pd(x86_mmx %a0, x86_mmx* %a1) optsize { ; ; SKX-LABEL: test_cvtpi2pd: ; SKX: # %bb.0: -; SKX-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [10:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [9:0.50] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvtpi2pd: @@ -232,7 +232,7 @@ define <4 x float> @test_cvtpi2ps(x86_mmx %a0, x86_mmx* %a1, <4 x float> %a2, <4 ; SKX: # %bb.0: ; SKX-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [6:2.00] ; SKX-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvtpi2ps: diff --git a/llvm/test/CodeGen/X86/recip-fastmath.ll b/llvm/test/CodeGen/X86/recip-fastmath.ll index 29254f5813fa..06447ab9d63c 100644 --- a/llvm/test/CodeGen/X86/recip-fastmath.ll +++ b/llvm/test/CodeGen/X86/recip-fastmath.ll @@ -153,7 +153,7 @@ define float @f32_one_step(float %x) #1 { ; SKX: # %bb.0: ; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00] ; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50] -; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.33] +; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast float 1.0, %x ret float %div @@ -271,10 +271,10 @@ define float @f32_two_step(float %x) #2 { ; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00] ; SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] ; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:0.33] -; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.33] -; SKX-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.33] -; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.33] -; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.33] +; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.50] +; SKX-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.50] +; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.50] +; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast float 1.0, %x ret float %div @@ -418,7 +418,7 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 { ; SKX: # %bb.0: ; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00] ; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.33] +; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <4 x float> , %x ret <4 x float> %div @@ -536,10 +536,10 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 { ; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00] ; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50] ; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.33] -; SKX-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.33] -; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.33] +; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.50] +; SKX-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.50] +; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.50] +; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <4 x float> , %x ret <4 x float> %div @@ -693,7 +693,7 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 { ; SKX: # %bb.0: ; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00] ; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.33] +; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <8 x float> , %x ret <8 x float> %div @@ -824,10 +824,10 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 { ; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00] ; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50] ; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [4:0.33] -; SKX-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [4:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [4:0.33] -; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [4:0.33] +; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [4:0.50] +; SKX-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [4:0.50] +; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [4:0.50] +; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <8 x float> , %x ret <8 x float> %div @@ -1031,9 +1031,9 @@ define <16 x float> @v16f32_one_step(<16 x float> %x) #1 { ; ; SKX-LABEL: v16f32_one_step: ; SKX: # %bb.0: -; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [9:2.00] +; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00] ; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [11:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.33] +; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <16 x float> , %x ret <16 x float> %div @@ -1235,13 +1235,13 @@ define <16 x float> @v16f32_two_step(<16 x float> %x) #2 { ; ; SKX-LABEL: v16f32_two_step: ; SKX: # %bb.0: -; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [9:2.00] +; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00] ; SKX-NEXT: vbroadcastss {{.*#+}} zmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] sched: [8:0.50] ; SKX-NEXT: vmovaps %zmm1, %zmm3 # sched: [1:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [4:0.33] -; SKX-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [4:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 sched: [4:0.33] -; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 sched: [4:0.33] +; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [4:0.50] +; SKX-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [4:0.50] +; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 sched: [4:0.50] +; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <16 x float> , %x ret <16 x float> %div diff --git a/llvm/test/CodeGen/X86/recip-fastmath2.ll b/llvm/test/CodeGen/X86/recip-fastmath2.ll index aaaf0c6ab7e5..7cc09dc96f2d 100644 --- a/llvm/test/CodeGen/X86/recip-fastmath2.ll +++ b/llvm/test/CodeGen/X86/recip-fastmath2.ll @@ -154,7 +154,7 @@ define float @f32_one_step_2(float %x) #1 { ; SKX: # %bb.0: ; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00] ; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50] -; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.33] +; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50] ; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast float 3456.0, %x @@ -254,9 +254,9 @@ define float @f32_one_step_2_divs(float %x) #1 { ; SKX: # %bb.0: ; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00] ; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50] -; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.33] +; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50] ; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50] -; SKX-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast float 3456.0, %x %div2 = fdiv fast float %div, %x @@ -383,10 +383,10 @@ define float @f32_two_step_2(float %x) #2 { ; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00] ; SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] ; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:0.33] -; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.33] -; SKX-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.33] -; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.33] -; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.33] +; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.50] +; SKX-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.50] +; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.50] +; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.50] ; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast float 6789.0, %x @@ -480,7 +480,7 @@ define <4 x float> @v4f32_one_step2(<4 x float> %x) #1 { ; SKX: # %bb.0: ; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00] ; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.33] +; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50] ; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <4 x float> , %x @@ -582,9 +582,9 @@ define <4 x float> @v4f32_one_step_2_divs(<4 x float> %x) #1 { ; SKX: # %bb.0: ; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00] ; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.33] +; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50] ; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [10:0.50] -; SKX-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <4 x float> , %x %div2 = fdiv fast <4 x float> %div, %x @@ -711,10 +711,10 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 { ; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00] ; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50] ; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.33] -; SKX-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.33] -; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.33] +; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.50] +; SKX-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.50] +; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.50] +; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.50] ; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <4 x float> , %x @@ -816,7 +816,7 @@ define <8 x float> @v8f32_one_step2(<8 x float> %x) #1 { ; SKX: # %bb.0: ; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00] ; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.33] +; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.50] ; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <8 x float> , %x @@ -927,9 +927,9 @@ define <8 x float> @v8f32_one_step_2_divs(<8 x float> %x) #1 { ; SKX: # %bb.0: ; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00] ; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.33] +; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.50] ; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [11:0.50] -; SKX-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <8 x float> , %x %div2 = fdiv fast <8 x float> %div, %x @@ -1070,10 +1070,10 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 { ; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00] ; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50] ; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [4:0.33] -; SKX-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [4:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [4:0.33] -; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [4:0.33] +; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [4:0.50] +; SKX-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [4:0.50] +; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [4:0.50] +; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [4:0.50] ; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <8 x float> , %x @@ -1331,9 +1331,9 @@ define <16 x float> @v16f32_one_step2(<16 x float> %x) #1 { ; ; SKX-LABEL: v16f32_one_step2: ; SKX: # %bb.0: -; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [9:2.00] +; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00] ; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [11:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.33] +; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.50] ; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <16 x float> , %x @@ -1498,11 +1498,11 @@ define <16 x float> @v16f32_one_step_2_divs(<16 x float> %x) #1 { ; ; SKX-LABEL: v16f32_one_step_2_divs: ; SKX: # %bb.0: -; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [9:2.00] +; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00] ; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [11:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.33] +; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.50] ; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm1 # sched: [11:0.50] -; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <16 x float> , %x %div2 = fdiv fast <16 x float> %div, %x @@ -1721,13 +1721,13 @@ define <16 x float> @v16f32_two_step2(<16 x float> %x) #2 { ; ; SKX-LABEL: v16f32_two_step2: ; SKX: # %bb.0: -; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [9:2.00] +; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00] ; SKX-NEXT: vbroadcastss {{.*#+}} zmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] sched: [8:0.50] ; SKX-NEXT: vmovaps %zmm1, %zmm3 # sched: [1:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [4:0.33] -; SKX-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [4:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 sched: [4:0.33] -; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 sched: [4:0.33] +; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [4:0.50] +; SKX-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [4:0.50] +; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 sched: [4:0.50] +; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 sched: [4:0.50] ; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <16 x float> , %x @@ -1786,7 +1786,7 @@ define <16 x float> @v16f32_no_step(<16 x float> %x) #3 { ; ; SKX-LABEL: v16f32_no_step: ; SKX: # %bb.0: -; SKX-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [9:2.00] +; SKX-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [4:2.00] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <16 x float> , %x ret <16 x float> %div @@ -1861,7 +1861,7 @@ define <16 x float> @v16f32_no_step2(<16 x float> %x) #3 { ; ; SKX-LABEL: v16f32_no_step2: ; SKX: # %bb.0: -; SKX-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [9:2.00] +; SKX-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [4:2.00] ; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <16 x float> , %x diff --git a/llvm/test/CodeGen/X86/schedule-x86_64.ll b/llvm/test/CodeGen/X86/schedule-x86_64.ll index 84b2f91424e7..3eb4cb15478e 100644 --- a/llvm/test/CodeGen/X86/schedule-x86_64.ll +++ b/llvm/test/CodeGen/X86/schedule-x86_64.ll @@ -8376,13 +8376,13 @@ define void @test_nop(i16 %a0, i32 %a1, i64 %a2, i16 *%p0, i32 *%p1, i64 *%p2) o ; SKX-LABEL: test_nop: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: nop # sched: [1:0.25] -; SKX-NEXT: nopw %di # sched: [1:0.25] -; SKX-NEXT: nopw (%rcx) # sched: [1:0.25] -; SKX-NEXT: nopl %esi # sched: [1:0.25] -; SKX-NEXT: nopl (%r8) # sched: [1:0.25] -; SKX-NEXT: nopq %rdx # sched: [1:0.25] -; SKX-NEXT: nopq (%r9) # sched: [1:0.25] +; SKX-NEXT: nop # sched: [1:0.17] +; SKX-NEXT: nopw %di # sched: [1:0.17] +; SKX-NEXT: nopw (%rcx) # sched: [1:0.17] +; SKX-NEXT: nopl %esi # sched: [1:0.17] +; SKX-NEXT: nopl (%r8) # sched: [1:0.17] +; SKX-NEXT: nopq %rdx # sched: [1:0.17] +; SKX-NEXT: nopq (%r9) # sched: [1:0.17] ; SKX-NEXT: #NO_APP ; SKX-NEXT: retq # sched: [7:1.00] ; diff --git a/llvm/test/CodeGen/X86/sha-schedule.ll b/llvm/test/CodeGen/X86/sha-schedule.ll index dedf6f98b8ac..7069bd3a1192 100644 --- a/llvm/test/CodeGen/X86/sha-schedule.ll +++ b/llvm/test/CodeGen/X86/sha-schedule.ll @@ -23,7 +23,7 @@ define <4 x i32> @test_sha1msg1(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; CANNONLAKE-LABEL: test_sha1msg1: ; CANNONLAKE: # %bb.0: -; CANNONLAKE-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [4:0.33] +; CANNONLAKE-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [4:0.50] ; CANNONLAKE-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [10:0.50] ; CANNONLAKE-NEXT: retq # sched: [7:1.00] ; @@ -54,7 +54,7 @@ define <4 x i32> @test_sha1msg2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; CANNONLAKE-LABEL: test_sha1msg2: ; CANNONLAKE: # %bb.0: -; CANNONLAKE-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [4:0.33] +; CANNONLAKE-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [4:0.50] ; CANNONLAKE-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [10:0.50] ; CANNONLAKE-NEXT: retq # sched: [7:1.00] ; @@ -85,7 +85,7 @@ define <4 x i32> @test_sha1nexte(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; CANNONLAKE-LABEL: test_sha1nexte: ; CANNONLAKE: # %bb.0: -; CANNONLAKE-NEXT: sha1nexte %xmm1, %xmm0 # sched: [4:0.33] +; CANNONLAKE-NEXT: sha1nexte %xmm1, %xmm0 # sched: [4:0.50] ; CANNONLAKE-NEXT: sha1nexte (%rdi), %xmm0 # sched: [10:0.50] ; CANNONLAKE-NEXT: retq # sched: [7:1.00] ; @@ -116,7 +116,7 @@ define <4 x i32> @test_sha1rnds4(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; CANNONLAKE-LABEL: test_sha1rnds4: ; CANNONLAKE: # %bb.0: -; CANNONLAKE-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [4:0.33] +; CANNONLAKE-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [4:0.50] ; CANNONLAKE-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [10:0.50] ; CANNONLAKE-NEXT: retq # sched: [7:1.00] ; @@ -151,7 +151,7 @@ define <4 x i32> @test_sha256msg1(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) ; ; CANNONLAKE-LABEL: test_sha256msg1: ; CANNONLAKE: # %bb.0: -; CANNONLAKE-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [4:0.33] +; CANNONLAKE-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [4:0.50] ; CANNONLAKE-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [10:0.50] ; CANNONLAKE-NEXT: retq # sched: [7:1.00] ; @@ -182,7 +182,7 @@ define <4 x i32> @test_sha256msg2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) ; ; CANNONLAKE-LABEL: test_sha256msg2: ; CANNONLAKE: # %bb.0: -; CANNONLAKE-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [4:0.33] +; CANNONLAKE-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [4:0.50] ; CANNONLAKE-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [10:0.50] ; CANNONLAKE-NEXT: retq # sched: [7:1.00] ; @@ -221,7 +221,7 @@ define <4 x i32> @test_sha256rnds2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, ; CANNONLAKE: # %bb.0: ; CANNONLAKE-NEXT: vmovaps %xmm0, %xmm3 # sched: [1:0.33] ; CANNONLAKE-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] -; CANNONLAKE-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [4:0.33] +; CANNONLAKE-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [4:0.50] ; CANNONLAKE-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [10:0.50] ; CANNONLAKE-NEXT: vmovaps %xmm3, %xmm0 # sched: [1:0.33] ; CANNONLAKE-NEXT: retq # sched: [7:1.00] diff --git a/llvm/test/CodeGen/X86/sse-schedule.ll b/llvm/test/CodeGen/X86/sse-schedule.ll index bd1e44f65612..f18d0934198b 100644 --- a/llvm/test/CodeGen/X86/sse-schedule.ll +++ b/llvm/test/CodeGen/X86/sse-schedule.ll @@ -90,13 +90,13 @@ define <4 x float> @test_addps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; ; SKX-SSE-LABEL: test_addps: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: addps (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_addps: ; SKX: # %bb.0: -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -198,13 +198,13 @@ define float @test_addss(float %a0, float %a1, float *%a2) { ; ; SKX-SSE-LABEL: test_addss: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: addss (%rdi), %xmm0 # sched: [9:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_addss: ; SKX: # %bb.0: -; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -551,14 +551,14 @@ define <4 x float> @test_cmpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; ; SKX-SSE-LABEL: test_cmpps: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [4:0.50] ; SKX-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.33] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cmpps: ; SKX: # %bb.0: -; SKX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [4:0.33] +; SKX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [4:0.50] ; SKX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] @@ -669,13 +669,13 @@ define float @test_cmpss(float %a0, float %a1, float *%a2) { ; ; SKX-SSE-LABEL: test_cmpss: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [9:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cmpss: ; SKX: # %bb.0: -; SKX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -1041,14 +1041,14 @@ define float @test_cvtsi2ss(i32 %a0, i32 *%a1) { ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:1.00] ; SKX-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00] -; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtsi2ss: ; SKX: # %bb.0: ; SKX-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00] ; SKX-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_cvtsi2ss: @@ -1167,14 +1167,14 @@ define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) { ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [6:2.00] ; SKX-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00] -; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtsi2ssq: ; SKX: # %bb.0: ; SKX-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [6:2.00] ; SKX-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_cvtsi2ssq: @@ -1420,14 +1420,14 @@ define i64 @test_cvtss2siq(float %a0, float *%a1) { ; ; SKX-SSE-LABEL: test_cvtss2siq: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [6:1.00] +; SKX-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [7:1.00] ; SKX-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [11:1.00] ; SKX-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtss2siq: ; SKX: # %bb.0: -; SKX-NEXT: vcvtss2si %xmm0, %rcx # sched: [6:1.00] +; SKX-NEXT: vcvtss2si %xmm0, %rcx # sched: [7:1.00] ; SKX-NEXT: vcvtss2si (%rdi), %rax # sched: [11:1.00] ; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] @@ -1549,14 +1549,14 @@ define i32 @test_cvttss2si(float %a0, float *%a1) { ; ; SKX-SSE-LABEL: test_cvttss2si: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [7:1.00] +; SKX-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [6:1.00] ; SKX-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [11:1.00] ; SKX-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvttss2si: ; SKX: # %bb.0: -; SKX-NEXT: vcvttss2si %xmm0, %ecx # sched: [7:1.00] +; SKX-NEXT: vcvttss2si %xmm0, %ecx # sched: [6:1.00] ; SKX-NEXT: vcvttss2si (%rdi), %eax # sched: [11:1.00] ; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] @@ -2116,13 +2116,13 @@ define <4 x float> @test_maxps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; ; SKX-SSE-LABEL: test_maxps: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_maxps: ; SKX: # %bb.0: -; SKX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -2225,13 +2225,13 @@ define <4 x float> @test_maxss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; ; SKX-SSE-LABEL: test_maxss: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [9:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_maxss: ; SKX: # %bb.0: -; SKX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -2334,13 +2334,13 @@ define <4 x float> @test_minps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; ; SKX-SSE-LABEL: test_minps: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: minps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: minps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: minps (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_minps: ; SKX: # %bb.0: -; SKX-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -2443,13 +2443,13 @@ define <4 x float> @test_minss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; ; SKX-SSE-LABEL: test_minss: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: minss %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: minss %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: minss (%rdi), %xmm0 # sched: [9:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_minss: ; SKX: # %bb.0: -; SKX-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -2564,14 +2564,14 @@ define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) { ; SKX-SSE-LABEL: test_movaps: ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50] -; SKX-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movaps: ; SKX: # %bb.0: ; SKX-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50] -; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -2795,7 +2795,7 @@ define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; SKX-SSE-LABEL: test_movhps: ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; SKX-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.50] ; SKX-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] ; SKX-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] @@ -2803,7 +2803,7 @@ define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; SKX-LABEL: test_movhps: ; SKX: # %bb.0: ; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -2918,13 +2918,13 @@ define <4 x float> @test_movlhps(<4 x float> %a0, <4 x float> %a1) { ; SKX-SSE-LABEL: test_movlhps: ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movlhps: ; SKX: # %bb.0: ; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_movlhps: @@ -3036,14 +3036,14 @@ define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; SKX-SSE-LABEL: test_movlps: ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; SKX-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.50] ; SKX-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movlps: ; SKX: # %bb.0: ; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -3352,14 +3352,14 @@ define void @test_movss_mem(float* %a0, float* %a1) { ; SKX-SSE-LABEL: test_movss_mem: ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-SSE-NEXT: addss %xmm0, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addss %xmm0, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movss_mem: ; SKX: # %bb.0: ; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -3572,14 +3572,14 @@ define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) { ; SKX-SSE-LABEL: test_movups: ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50] -; SKX-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movups: ; SKX: # %bb.0: ; SKX-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] -; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -3685,13 +3685,13 @@ define <4 x float> @test_mulps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; ; SKX-SSE-LABEL: test_mulps: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_mulps: ; SKX: # %bb.0: -; SKX-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -3793,13 +3793,13 @@ define float @test_mulss(float %a0, float %a1, float *%a2) { ; ; SKX-SSE-LABEL: test_mulss: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [9:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_mulss: ; SKX: # %bb.0: -; SKX-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -4206,14 +4206,14 @@ define <4 x float> @test_rcpps(<4 x float> %a0, <4 x float> *%a1) { ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [4:1.00] ; SKX-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [10:1.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_rcpps: ; SKX: # %bb.0: ; SKX-NEXT: vrcpps %xmm0, %xmm0 # sched: [4:1.00] ; SKX-NEXT: vrcpps (%rdi), %xmm1 # sched: [10:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_rcpps: @@ -4347,7 +4347,7 @@ define <4 x float> @test_rcpss(float %a0, float *%a1) { ; SKX-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [4:1.00] ; SKX-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] ; SKX-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [4:1.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_rcpss: @@ -4355,7 +4355,7 @@ define <4 x float> @test_rcpss(float %a0, float *%a1) { ; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] ; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] ; SKX-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_rcpss: @@ -4483,14 +4483,14 @@ define <4 x float> @test_rsqrtps(<4 x float> %a0, <4 x float> *%a1) { ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [4:1.00] ; SKX-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [10:1.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_rsqrtps: ; SKX: # %bb.0: ; SKX-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [4:1.00] ; SKX-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [10:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_rsqrtps: @@ -4624,7 +4624,7 @@ define <4 x float> @test_rsqrtss(float %a0, float *%a1) { ; SKX-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [4:1.00] ; SKX-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] ; SKX-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [4:1.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_rsqrtss: @@ -4632,7 +4632,7 @@ define <4 x float> @test_rsqrtss(float %a0, float *%a1) { ; SKX-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] ; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] ; SKX-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_rsqrtss: @@ -4854,14 +4854,14 @@ define <4 x float> @test_shufps(<4 x float> %a0, <4 x float> %a1, <4 x float> *% ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] ; SKX-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_shufps: ; SKX: # %bb.0: ; SKX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] ; SKX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_shufps: @@ -4981,14 +4981,14 @@ define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) { ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [12:3.00] ; SKX-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [18:3.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_sqrtps: ; SKX: # %bb.0: ; SKX-NEXT: vsqrtps %xmm0, %xmm0 # sched: [12:3.00] ; SKX-NEXT: vsqrtps (%rdi), %xmm1 # sched: [18:3.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_sqrtps: @@ -5122,7 +5122,7 @@ define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) { ; SKX-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [12:3.00] ; SKX-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50] ; SKX-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [12:3.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_sqrtss: @@ -5130,7 +5130,7 @@ define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) { ; SKX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:3.00] ; SKX-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50] ; SKX-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [12:3.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_sqrtss: @@ -5351,13 +5351,13 @@ define <4 x float> @test_subps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; ; SKX-SSE-LABEL: test_subps: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: subps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: subps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: subps (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_subps: ; SKX: # %bb.0: -; SKX-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -5459,13 +5459,13 @@ define float @test_subss(float %a0, float %a1, float *%a2) { ; ; SKX-SSE-LABEL: test_subss: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: subss %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: subss %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: subss (%rdi), %xmm0 # sched: [9:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_subss: ; SKX: # %bb.0: -; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -5826,14 +5826,14 @@ define <4 x float> @test_unpckhps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] ; SKX-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_unpckhps: ; SKX: # %bb.0: ; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] ; SKX-NEXT: vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_unpckhps: @@ -5952,14 +5952,14 @@ define <4 x float> @test_unpcklps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] ; SKX-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_unpcklps: ; SKX: # %bb.0: ; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] ; SKX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_unpcklps: @@ -6210,7 +6210,7 @@ define <4 x float> @test_fnop() nounwind { ; SKX-SSE-LABEL: test_fnop: ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: #APP -; SKX-SSE-NEXT: nop # sched: [1:0.25] +; SKX-SSE-NEXT: nop # sched: [1:0.17] ; SKX-SSE-NEXT: #NO_APP ; SKX-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.33] ; SKX-SSE-NEXT: retq # sched: [7:1.00] @@ -6218,7 +6218,7 @@ define <4 x float> @test_fnop() nounwind { ; SKX-LABEL: test_fnop: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: nop # sched: [1:0.25] +; SKX-NEXT: nop # sched: [1:0.17] ; SKX-NEXT: #NO_APP ; SKX-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] diff --git a/llvm/test/CodeGen/X86/sse2-schedule.ll b/llvm/test/CodeGen/X86/sse2-schedule.ll index 55a5fe6c8edd..0af49688c522 100644 --- a/llvm/test/CodeGen/X86/sse2-schedule.ll +++ b/llvm/test/CodeGen/X86/sse2-schedule.ll @@ -88,13 +88,13 @@ define <2 x double> @test_addpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; SKX-SSE-LABEL: test_addpd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_addpd: ; SKX: # %bb.0: -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -196,13 +196,13 @@ define double @test_addsd(double %a0, double %a1, double *%a2) { ; ; SKX-SSE-LABEL: test_addsd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_addsd: ; SKX: # %bb.0: -; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -317,14 +317,14 @@ define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.33] ; SKX-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_andpd: ; SKX: # %bb.0: ; SKX-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_andpd: @@ -447,14 +447,14 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.33] ; SKX-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_andnotpd: ; SKX: # %bb.0: ; SKX-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_andnotpd: @@ -673,14 +673,14 @@ define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; SKX-SSE-LABEL: test_cmppd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [4:0.50] ; SKX-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cmppd: ; SKX: # %bb.0: -; SKX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.33] +; SKX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.50] ; SKX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] @@ -790,13 +790,13 @@ define double @test_cmpsd(double %a0, double %a1, double *%a2) { ; ; SKX-SSE-LABEL: test_cmpsd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cmpsd: ; SKX: # %bb.0: -; SKX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -1162,16 +1162,16 @@ define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { ; ; SKX-SSE-LABEL: test_cvtdq2pd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00] +; SKX-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:0.50] ; SKX-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [11:1.00] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtdq2pd: ; SKX: # %bb.0: -; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00] +; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [11:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_cvtdq2pd: @@ -1291,16 +1291,16 @@ define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) { ; ; SKX-SSE-LABEL: test_cvtdq2ps: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:0.50] ; SKX-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtdq2ps: ; SKX: # %bb.0: -; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [10:0.50] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_cvtdq2ps: @@ -1427,7 +1427,7 @@ define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; SKX-LABEL: test_cvtpd2dq: ; SKX: # %bb.0: ; SKX-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00] +; SKX-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:0.50] ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -1550,14 +1550,14 @@ define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) { ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [5:1.00] ; SKX-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtpd2ps: ; SKX: # %bb.0: ; SKX-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00] ; SKX-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_cvtpd2ps: @@ -1676,14 +1676,14 @@ define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) { ; ; SKX-SSE-LABEL: test_cvtps2dq: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:0.50] ; SKX-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtps2dq: ; SKX: # %bb.0: -; SKX-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [10:0.50] ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] @@ -1806,14 +1806,14 @@ define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) { ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [5:1.00] ; SKX-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [9:0.50] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtps2pd: ; SKX: # %bb.0: ; SKX-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00] ; SKX-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [9:0.50] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_cvtps2pd: @@ -2205,7 +2205,7 @@ define float @test_cvtsd2ss(double %a0, double *%a1) { ; SKX-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [5:1.00] ; SKX-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] ; SKX-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [5:1.00] -; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtsd2ss: @@ -2213,7 +2213,7 @@ define float @test_cvtsd2ss(double %a0, double *%a1) { ; SKX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] ; SKX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] ; SKX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] -; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_cvtsd2ss: @@ -2336,14 +2336,14 @@ define double @test_cvtsi2sd(i32 %a0, i32 *%a1) { ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [5:1.00] ; SKX-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] -; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtsi2sd: ; SKX: # %bb.0: ; SKX-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00] ; SKX-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_cvtsi2sd: @@ -2462,14 +2462,14 @@ define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) { ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00] ; SKX-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] -; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtsi2sdq: ; SKX: # %bb.0: ; SKX-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00] ; SKX-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_cvtsi2sdq: @@ -2603,7 +2603,7 @@ define double @test_cvtss2sd(float %a0, float *%a1) { ; SKX-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [5:1.00] ; SKX-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] ; SKX-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [5:1.00] -; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtss2sd: @@ -2611,7 +2611,7 @@ define double @test_cvtss2sd(float %a0, float *%a1) { ; SKX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] ; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] ; SKX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [5:1.00] -; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_cvtss2sd: @@ -2742,7 +2742,7 @@ define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; SKX-LABEL: test_cvttpd2dq: ; SKX: # %bb.0: ; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00] +; SKX-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:0.50] ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -2863,14 +2863,14 @@ define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) { ; ; SKX-SSE-LABEL: test_cvttps2dq: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:0.50] ; SKX-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvttps2dq: ; SKX: # %bb.0: -; SKX-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [10:0.50] ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] @@ -3732,13 +3732,13 @@ define <2 x double> @test_maxpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; SKX-SSE-LABEL: test_maxpd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_maxpd: ; SKX: # %bb.0: -; SKX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -3841,13 +3841,13 @@ define <2 x double> @test_maxsd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; SKX-SSE-LABEL: test_maxsd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [9:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_maxsd: ; SKX: # %bb.0: -; SKX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -3950,13 +3950,13 @@ define <2 x double> @test_minpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; SKX-SSE-LABEL: test_minpd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_minpd: ; SKX: # %bb.0: -; SKX-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -4059,13 +4059,13 @@ define <2 x double> @test_minsd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; SKX-SSE-LABEL: test_minsd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [9:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_minsd: ; SKX: # %bb.0: -; SKX-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -4180,14 +4180,14 @@ define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) { ; SKX-SSE-LABEL: test_movapd: ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50] -; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movapd: ; SKX: # %bb.0: ; SKX-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50] -; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -4917,14 +4917,14 @@ define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; SKX-SSE-LABEL: test_movhpd: ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50] ; SKX-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movhpd: ; SKX: # %bb.0: ; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -5045,14 +5045,14 @@ define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; SKX-SSE-LABEL: test_movlpd: ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50] ; SKX-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movlpd: ; SKX: # %bb.0: ; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -5362,13 +5362,13 @@ define void @test_movntpd(<2 x double> %a0, <2 x double> *%a1) { ; ; SKX-SSE-LABEL: test_movntpd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movntpd: ; SKX: # %bb.0: -; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -5717,14 +5717,14 @@ define void @test_movsd_mem(double* %a0, double* %a1) { ; SKX-SSE-LABEL: test_movsd_mem: ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; SKX-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movsd_mem: ; SKX: # %bb.0: ; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; SKX-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -5945,14 +5945,14 @@ define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) { ; SKX-SSE-LABEL: test_movupd: ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50] -; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movupd: ; SKX: # %bb.0: ; SKX-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50] -; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -6058,13 +6058,13 @@ define <2 x double> @test_mulpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; SKX-SSE-LABEL: test_mulpd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_mulpd: ; SKX: # %bb.0: -; SKX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -6166,13 +6166,13 @@ define double @test_mulsd(double %a0, double %a1, double *%a2) { ; ; SKX-SSE-LABEL: test_mulsd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [9:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_mulsd: ; SKX: # %bb.0: -; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -6287,14 +6287,14 @@ define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33] ; SKX-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_orpd: ; SKX: # %bb.0: ; SKX-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_orpd: @@ -9176,13 +9176,13 @@ define <4 x i32> @test_pmaddwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SKX-SSE-LABEL: test_pmaddwd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmaddwd: ; SKX: # %bb.0: -; SKX-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -9830,13 +9830,13 @@ define <8 x i16> @test_pmulhuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SKX-SSE-LABEL: test_pmulhuw: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmulhuw: ; SKX: # %bb.0: -; SKX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -9939,13 +9939,13 @@ define <8 x i16> @test_pmulhw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SKX-SSE-LABEL: test_pmulhw: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmulhw: ; SKX: # %bb.0: -; SKX-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -10048,13 +10048,13 @@ define <8 x i16> @test_pmullw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SKX-SSE-LABEL: test_pmullw: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmullw: ; SKX: # %bb.0: -; SKX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -10156,13 +10156,13 @@ define <2 x i64> @test_pmuludq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; SKX-SSE-LABEL: test_pmuludq: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmuludq: ; SKX: # %bb.0: -; SKX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -14094,14 +14094,14 @@ define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] ; SKX-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_shufpd: ; SKX: # %bb.0: ; SKX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] ; SKX-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_shufpd: @@ -14221,14 +14221,14 @@ define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) { ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [18:6.00] ; SKX-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [24:6.00] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_sqrtpd: ; SKX: # %bb.0: ; SKX-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [18:6.00] ; SKX-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [24:6.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_sqrtpd: @@ -14362,7 +14362,7 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) { ; SKX-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [18:6.00] ; SKX-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50] ; SKX-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [18:6.00] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_sqrtsd: @@ -14370,7 +14370,7 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) { ; SKX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:6.00] ; SKX-NEXT: vmovapd (%rdi), %xmm1 # sched: [6:0.50] ; SKX-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [18:6.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_sqrtsd: @@ -14481,13 +14481,13 @@ define <2 x double> @test_subpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; SKX-SSE-LABEL: test_subpd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_subpd: ; SKX: # %bb.0: -; SKX-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -14589,13 +14589,13 @@ define double @test_subsd(double %a0, double %a1, double *%a2) { ; ; SKX-SSE-LABEL: test_subsd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [9:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_subsd: ; SKX: # %bb.0: -; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -14956,14 +14956,14 @@ define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] ; SKX-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_unpckhpd: ; SKX: # %bb.0: ; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] ; SKX-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_unpckhpd: @@ -15097,7 +15097,7 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SKX-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] ; SKX-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.33] ; SKX-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] -; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50] ; SKX-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; @@ -15105,7 +15105,7 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SKX: # %bb.0: ; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] ; SKX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [7:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_unpcklpd: @@ -15228,14 +15228,14 @@ define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.33] ; SKX-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_xorpd: ; SKX: # %bb.0: ; SKX-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_xorpd: diff --git a/llvm/test/CodeGen/X86/sse3-schedule.ll b/llvm/test/CodeGen/X86/sse3-schedule.ll index ae656e5cd1c8..d8b0c45e8666 100644 --- a/llvm/test/CodeGen/X86/sse3-schedule.ll +++ b/llvm/test/CodeGen/X86/sse3-schedule.ll @@ -88,13 +88,13 @@ define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; ; SKX-SSE-LABEL: test_addsubpd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_addsubpd: ; SKX: # %bb.0: -; SKX-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -197,13 +197,13 @@ define <4 x float> @test_addsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; ; SKX-SSE-LABEL: test_addsubps: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_addsubps: ; SKX: # %bb.0: -; SKX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -972,14 +972,14 @@ define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) { ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00] ; SKX-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50] -; SKX-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movddup: ; SKX: # %bb.0: ; SKX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] ; SKX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50] -; SKX-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_movddup: @@ -1099,14 +1099,14 @@ define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) { ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00] ; SKX-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movshdup: ; SKX: # %bb.0: ; SKX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] ; SKX-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_movshdup: @@ -1226,14 +1226,14 @@ define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) { ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00] ; SKX-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movsldup: ; SKX: # %bb.0: ; SKX-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] ; SKX-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_movsldup: diff --git a/llvm/test/CodeGen/X86/sse41-schedule.ll b/llvm/test/CodeGen/X86/sse41-schedule.ll index 9cb7ecac8327..f38dd4aafa5d 100644 --- a/llvm/test/CodeGen/X86/sse41-schedule.ll +++ b/llvm/test/CodeGen/X86/sse41-schedule.ll @@ -92,14 +92,14 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; SKX-SSE-LABEL: test_blendpd: ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_blendpd: ; SKX: # %bb.0: ; SKX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] -; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -212,14 +212,14 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] ; SKX-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_blendps: ; SKX: # %bb.0: ; SKX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] ; SKX-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_blendps: @@ -2065,14 +2065,14 @@ define <8 x i16> @test_phminposuw(<8 x i16> *%a0) { ; ; SKX-SSE-LABEL: test_phminposuw: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [10:1.00] +; SKX-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_phminposuw: ; SKX: # %bb.0: -; SKX-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:0.50] -; SKX-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:1.00] +; SKX-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_phminposuw: @@ -4767,13 +4767,13 @@ define <2 x i64> @test_pmuldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; SKX-SSE-LABEL: test_pmuldq: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmuldq: ; SKX: # %bb.0: -; SKX-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -4871,14 +4871,14 @@ define <4 x i32> @test_pmulld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; SKX-SSE-LABEL: test_pmulld: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:0.67] -; SKX-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [16:0.67] +; SKX-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:1.00] +; SKX-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [16:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmulld: ; SKX: # %bb.0: -; SKX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:0.67] -; SKX-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:0.67] +; SKX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:1.00] +; SKX-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_pmulld: @@ -5153,16 +5153,16 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) { ; ; SKX-SSE-LABEL: test_roundpd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [8:0.67] -; SKX-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [14:0.67] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [8:1.00] +; SKX-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [14:1.00] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_roundpd: ; SKX: # %bb.0: -; SKX-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [8:0.67] -; SKX-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [14:0.67] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [8:1.00] +; SKX-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [14:1.00] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_roundpd: @@ -5275,16 +5275,16 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) { ; ; SKX-SSE-LABEL: test_roundps: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [8:0.67] -; SKX-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [14:0.67] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [8:1.00] +; SKX-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [14:1.00] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_roundps: ; SKX: # %bb.0: -; SKX-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [8:0.67] -; SKX-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [14:0.67] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [8:1.00] +; SKX-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [14:1.00] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_roundps: @@ -5402,16 +5402,16 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; SKX-SSE-LABEL: test_roundsd: ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.33] -; SKX-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [8:0.67] -; SKX-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [14:0.67] -; SKX-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [8:1.00] +; SKX-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [14:1.00] +; SKX-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_roundsd: ; SKX: # %bb.0: -; SKX-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67] -; SKX-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [14:0.67] -; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00] +; SKX-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00] +; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_roundsd: @@ -5531,16 +5531,16 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; SKX-SSE-LABEL: test_roundss: ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.33] -; SKX-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [8:0.67] -; SKX-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [14:0.67] -; SKX-SSE-NEXT: addps %xmm2, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [8:1.00] +; SKX-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [14:1.00] +; SKX-SSE-NEXT: addps %xmm2, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_roundss: ; SKX: # %bb.0: -; SKX-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67] -; SKX-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [14:0.67] -; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00] +; SKX-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00] +; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_roundss: diff --git a/llvm/test/CodeGen/X86/ssse3-schedule.ll b/llvm/test/CodeGen/X86/ssse3-schedule.ll index a0a0355fe34e..b10b1bb5c74c 100644 --- a/llvm/test/CodeGen/X86/ssse3-schedule.ll +++ b/llvm/test/CodeGen/X86/ssse3-schedule.ll @@ -1249,13 +1249,13 @@ define <8 x i16> @test_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; ; SKX-SSE-LABEL: test_pmaddubsw: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmaddubsw: ; SKX: # %bb.0: -; SKX-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -1359,13 +1359,13 @@ define <8 x i16> @test_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SKX-SSE-LABEL: test_pmulhrsw: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmulhrsw: ; SKX: # %bb.0: -; SKX-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s index 3eb5c3561782..fc28ed4288f8 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s @@ -1018,25 +1018,25 @@ vzeroupper # CHECK-NEXT: [6]: HasSideEffects # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 4 0.33 vaddpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vaddpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vaddpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vaddpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vaddpd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vaddpd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vaddps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vaddps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vaddps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vaddps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vaddps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vaddsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vaddsd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vaddsd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vaddss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vaddss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vaddss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vaddsubpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vaddsubpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vaddsubpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vaddsubpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vaddsubpd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vaddsubpd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vaddsubps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vaddsubps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vaddsubps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vaddsubps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vaddsubps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vaddsubps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 4 1.00 vaesdec %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 1.00 * vaesdec (%rax), %xmm1, %xmm2 @@ -1086,41 +1086,41 @@ vzeroupper # CHECK-NEXT: 1 7 0.50 * vbroadcastsd (%rax), %ymm2 # CHECK-NEXT: 1 6 0.50 * vbroadcastss (%rax), %xmm2 # CHECK-NEXT: 1 7 0.50 * vbroadcastss (%rax), %ymm2 -# CHECK-NEXT: 1 4 0.33 vcmppd $0, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vcmppd $0, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vcmppd $0, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vcmppd $0, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vcmppd $0, %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vcmppd $0, (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vcmpps $0, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vcmpps $0, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vcmpps $0, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vcmpps $0, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vcmpps $0, %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vcmpps $0, (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vcmpsd $0, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vcmpsd $0, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vcmpsd $0, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vcmpss $0, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vcmpss $0, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vcmpss $0, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 2 1.00 vcomisd %xmm0, %xmm1 # CHECK-NEXT: 2 7 1.00 * vcomisd (%rax), %xmm1 # CHECK-NEXT: 1 2 1.00 vcomiss %xmm0, %xmm1 # CHECK-NEXT: 2 7 1.00 * vcomiss (%rax), %xmm1 -# CHECK-NEXT: 2 5 1.00 vcvtdq2pd %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 vcvtdq2pd %xmm0, %xmm2 # CHECK-NEXT: 3 11 1.00 * vcvtdq2pd (%rax), %xmm2 # CHECK-NEXT: 2 7 1.00 vcvtdq2pd %xmm0, %ymm2 # CHECK-NEXT: 3 13 1.00 * vcvtdq2pd (%rax), %ymm2 -# CHECK-NEXT: 1 4 0.33 vcvtdq2ps %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 vcvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * vcvtdq2ps (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 vcvtdq2ps %ymm0, %ymm2 +# CHECK-NEXT: 1 4 0.50 vcvtdq2ps %ymm0, %ymm2 # CHECK-NEXT: 2 11 0.50 * vcvtdq2ps (%rax), %ymm2 # CHECK-NEXT: 2 5 1.00 vcvtpd2dq %xmm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvtpd2dqx (%rax), %xmm2 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dqx (%rax), %xmm2 # CHECK-NEXT: 2 7 1.00 vcvtpd2dq %ymm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvtpd2dqy (%rax), %xmm2 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dqy (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 vcvtpd2ps %xmm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * vcvtpd2psx (%rax), %xmm2 # CHECK-NEXT: 2 7 1.00 vcvtpd2ps %ymm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvtpd2psy (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 vcvtps2dq %xmm0, %xmm2 +# CHECK-NEXT: 3 8 1.00 * vcvtpd2psy (%rax), %xmm2 +# CHECK-NEXT: 1 4 0.50 vcvtps2dq %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 vcvtps2dq %ymm0, %ymm2 +# CHECK-NEXT: 1 4 0.50 vcvtps2dq %ymm0, %ymm2 # CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax), %ymm2 # CHECK-NEXT: 2 5 1.00 vcvtps2pd %xmm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * vcvtps2pd (%rax), %xmm2 @@ -1143,22 +1143,22 @@ vzeroupper # CHECK-NEXT: 2 5 1.00 vcvtss2sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vcvtss2sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 2 6 1.00 vcvtss2si %xmm0, %ecx -# CHECK-NEXT: 2 6 1.00 vcvtss2si %xmm0, %rcx +# CHECK-NEXT: 3 7 1.00 vcvtss2si %xmm0, %rcx # CHECK-NEXT: 3 11 1.00 * vcvtss2si (%rax), %ecx # CHECK-NEXT: 3 11 1.00 * vcvtss2si (%rax), %rcx # CHECK-NEXT: 2 5 1.00 vcvttpd2dq %xmm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvttpd2dqx (%rax), %xmm2 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dqx (%rax), %xmm2 # CHECK-NEXT: 2 7 1.00 vcvttpd2dq %ymm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvttpd2dqy (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 vcvttps2dq %xmm0, %xmm2 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dqy (%rax), %xmm2 +# CHECK-NEXT: 1 4 0.50 vcvttps2dq %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * vcvttps2dq (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 vcvttps2dq %ymm0, %ymm2 +# CHECK-NEXT: 1 3 0.50 vcvttps2dq %ymm0, %ymm2 # CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax), %ymm2 # CHECK-NEXT: 2 6 1.00 vcvttsd2si %xmm0, %ecx # CHECK-NEXT: 2 6 1.00 vcvttsd2si %xmm0, %rcx # CHECK-NEXT: 3 11 1.00 * vcvttsd2si (%rax), %ecx # CHECK-NEXT: 3 11 1.00 * vcvttsd2si (%rax), %rcx -# CHECK-NEXT: 3 7 1.00 vcvttss2si %xmm0, %ecx +# CHECK-NEXT: 2 6 1.00 vcvttss2si %xmm0, %ecx # CHECK-NEXT: 3 7 1.00 vcvttss2si %xmm0, %rcx # CHECK-NEXT: 3 11 1.00 * vcvttss2si (%rax), %ecx # CHECK-NEXT: 3 11 1.00 * vcvttss2si (%rax), %rcx @@ -1216,29 +1216,29 @@ vzeroupper # CHECK-NEXT: 2 8 0.50 * vmaskmovps (%rax), %ymm0, %ymm2 # CHECK-NEXT: 2 2 1.00 * * vmaskmovps %xmm0, %xmm1, (%rax) # CHECK-NEXT: 2 2 1.00 * * vmaskmovps %ymm0, %ymm1, (%rax) -# CHECK-NEXT: 1 4 0.33 vmaxpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vmaxpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vmaxpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vmaxpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vmaxpd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vmaxpd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vmaxps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vmaxps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vmaxps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vmaxps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vmaxps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vmaxps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vmaxsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vmaxsd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vmaxsd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vmaxss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vmaxss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vmaxss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vminpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vminpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vminpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vminpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vminpd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vminpd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vminps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vminps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vminps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vminps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vminps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vminps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vminsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vminsd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vminsd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vminss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vminss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vminss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.33 vmovapd %xmm0, %xmm2 # CHECK-NEXT: 2 1 1.00 * vmovapd %xmm0, (%rax) @@ -1327,17 +1327,17 @@ vzeroupper # CHECK-NEXT: 1 7 0.50 * vmovups (%rax), %ymm2 # CHECK-NEXT: 2 4 2.00 vmpsadbw $1, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 3 10 2.00 * vmpsadbw $1, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vmulpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vmulpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vmulpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vmulpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vmulpd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vmulpd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vmulps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vmulps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vmulps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vmulps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vmulps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vmulps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vmulsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vmulsd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vmulsd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vmulss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vmulss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vmulss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.33 vorpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 7 0.50 * vorpd (%rax), %xmm1, %xmm2 @@ -1441,8 +1441,8 @@ vzeroupper # CHECK-NEXT: 4 9 2.00 * vphaddsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 3 3 2.00 vphaddw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 4 9 2.00 * vphaddw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vphminposuw %xmm0, %xmm2 -# CHECK-NEXT: 2 10 0.50 * vphminposuw (%rax), %xmm2 +# CHECK-NEXT: 1 4 1.00 vphminposuw %xmm0, %xmm2 +# CHECK-NEXT: 2 10 1.00 * vphminposuw (%rax), %xmm2 # CHECK-NEXT: 3 3 2.00 vphsubd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 4 9 2.00 * vphsubd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 3 3 2.00 vphsubsw %xmm0, %xmm1, %xmm2 @@ -1457,9 +1457,9 @@ vzeroupper # CHECK-NEXT: 2 6 1.00 * vpinsrq $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 2 2 2.00 vpinsrw $1, %eax, %xmm1, %xmm2 # CHECK-NEXT: 2 6 1.00 * vpinsrw $1, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vpmaddubsw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vpmaddubsw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vpmaddubsw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vpmaddwd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vpmaddwd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vpmaddwd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.50 vpmaxsb %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 7 0.50 * vpmaxsb (%rax), %xmm1, %xmm2 @@ -1510,19 +1510,19 @@ vzeroupper # CHECK-NEXT: 2 6 1.00 * vpmovzxwd (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 vpmovzxwq %xmm0, %xmm2 # CHECK-NEXT: 2 6 1.00 * vpmovzxwq (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 vpmuldq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vpmuldq %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vpmuldq (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vpmulhrsw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vpmulhrsw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vpmulhrsw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vpmulhuw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vpmulhuw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vpmulhuw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vpmulhw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vpmulhw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vpmulhw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 2 10 0.67 vpmulld %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 3 16 0.67 * vpmulld (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vpmullw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 10 1.00 vpmulld %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 3 16 1.00 * vpmulld (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vpmullw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vpmullw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vpmuludq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vpmuludq %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vpmuludq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.33 vpor %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 7 0.50 * vpor (%rax), %xmm1, %xmm2 @@ -1612,18 +1612,18 @@ vzeroupper # CHECK-NEXT: 2 11 1.00 * vrcpps (%rax), %ymm2 # CHECK-NEXT: 1 4 1.00 vrcpss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 1.00 * vrcpss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 2 8 0.67 vroundpd $1, %xmm0, %xmm2 -# CHECK-NEXT: 3 14 0.67 * vroundpd $1, (%rax), %xmm2 -# CHECK-NEXT: 2 8 0.67 vroundpd $1, %ymm0, %ymm2 -# CHECK-NEXT: 3 15 0.67 * vroundpd $1, (%rax), %ymm2 -# CHECK-NEXT: 2 8 0.67 vroundps $1, %xmm0, %xmm2 -# CHECK-NEXT: 3 14 0.67 * vroundps $1, (%rax), %xmm2 -# CHECK-NEXT: 2 8 0.67 vroundps $1, %ymm0, %ymm2 -# CHECK-NEXT: 3 15 0.67 * vroundps $1, (%rax), %ymm2 -# CHECK-NEXT: 2 8 0.67 vroundsd $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 3 14 0.67 * vroundsd $1, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 2 8 0.67 vroundss $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 3 14 0.67 * vroundss $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 8 1.00 vroundpd $1, %xmm0, %xmm2 +# CHECK-NEXT: 3 14 1.00 * vroundpd $1, (%rax), %xmm2 +# CHECK-NEXT: 2 8 1.00 vroundpd $1, %ymm0, %ymm2 +# CHECK-NEXT: 3 15 1.00 * vroundpd $1, (%rax), %ymm2 +# CHECK-NEXT: 2 8 1.00 vroundps $1, %xmm0, %xmm2 +# CHECK-NEXT: 3 14 1.00 * vroundps $1, (%rax), %xmm2 +# CHECK-NEXT: 2 8 1.00 vroundps $1, %ymm0, %ymm2 +# CHECK-NEXT: 3 15 1.00 * vroundps $1, (%rax), %ymm2 +# CHECK-NEXT: 2 8 1.00 vroundsd $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 3 14 1.00 * vroundsd $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 8 1.00 vroundss $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 3 14 1.00 * vroundss $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 4 1.00 vrsqrtps %xmm0, %xmm2 # CHECK-NEXT: 2 10 1.00 * vrsqrtps (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 vrsqrtps %ymm0, %ymm2 @@ -1651,17 +1651,17 @@ vzeroupper # CHECK-NEXT: 1 12 3.00 vsqrtss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 17 3.00 * vsqrtss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 3 2 1.00 * * * vstmxcsr (%rax) -# CHECK-NEXT: 1 4 0.33 vsubpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vsubpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vsubpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vsubpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vsubpd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vsubpd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vsubps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vsubps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vsubps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vsubps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vsubps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vsubps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vsubsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vsubsd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vsubsd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vsubss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vsubss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vsubss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 2 1.00 vtestpd %xmm0, %xmm1 # CHECK-NEXT: 2 8 1.00 * vtestpd (%rax), %xmm1 @@ -1716,30 +1716,30 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 123.00 271.00 170.00 171.17 171.17 34.00 376.00 5.00 12.67 +# CHECK-NEXT: - 123.00 290.83 198.83 171.17 171.17 34.00 327.33 5.00 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddpd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddsd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddsubpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddsubpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddsubpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddsubpd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddsubps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddsubps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddsubps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddsubps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddsubpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddsubpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddsubpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddsubpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddsubps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddsubps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddsubps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddsubps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - 1.00 - - - - - - - vaesdec %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vaesdec (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - vaesdeclast %xmm0, %xmm1, %xmm2 @@ -1788,50 +1788,50 @@ vzeroupper # CHECK-NEXT: - - - - 0.50 0.50 - - - - vbroadcastsd (%rax), %ymm2 # CHECK-NEXT: - - - - 0.50 0.50 - - - - vbroadcastss (%rax), %xmm2 # CHECK-NEXT: - - - - 0.50 0.50 - - - - vbroadcastss (%rax), %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcmppd $0, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcmppd $0, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcmppd $0, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcmppd $0, (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcmpps $0, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcmpps $0, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcmpps $0, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcmpps $0, (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcmpsd $0, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcmpsd $0, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcmpss $0, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcmpss $0, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcmppd $0, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcmppd $0, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcmppd $0, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcmppd $0, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcmpps $0, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcmpps $0, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcmpps $0, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcmpps $0, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcmpsd $0, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcmpsd $0, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcmpss $0, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcmpss $0, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - vcomisd %xmm0, %xmm1 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcomisd (%rax), %xmm1 # CHECK-NEXT: - - 1.00 - - - - - - - vcomiss %xmm0, %xmm1 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcomiss (%rax), %xmm1 -# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtdq2pd %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtdq2pd %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vcvtdq2pd (%rax), %xmm2 -# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtdq2pd %xmm0, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtdq2pd %xmm0, %ymm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vcvtdq2pd (%rax), %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcvtdq2ps %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtdq2ps (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcvtdq2ps %ymm0, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtdq2ps %ymm0, %ymm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtdq2ps (%rax), %ymm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2dq %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqx (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dqx (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2dq %ymm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqy (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dqy (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2ps %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2psx (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2ps %ymm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2psy (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcvtps2dq %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2psy (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2dq %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcvtps2dq %ymm0, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2dq %ymm0, %ymm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax), %ymm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2pd %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2pd %xmm0, %ymm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax), %ymm2 -# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - vcvtsd2si %xmm0, %ecx -# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - vcvtsd2si %xmm0, %rcx -# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - vcvtsd2si (%rax), %ecx -# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - vcvtsd2si (%rax), %rcx +# CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvtsd2si %xmm0, %ecx +# CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvtsd2si %xmm0, %rcx +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtsd2si (%rax), %ecx +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtsd2si (%rax), %rcx # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsd2ss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvtsd2ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsi2sdl %ecx, %xmm0, %xmm2 @@ -1844,26 +1844,26 @@ vzeroupper # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtsi2ssq (%rax), %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtss2sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtss2sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - vcvtss2si %xmm0, %ecx -# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - vcvtss2si %xmm0, %rcx -# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - vcvtss2si (%rax), %ecx -# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - vcvtss2si (%rax), %rcx +# CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvtss2si %xmm0, %ecx +# CHECK-NEXT: - - 1.33 0.33 - - - 1.33 - - vcvtss2si %xmm0, %rcx +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtss2si (%rax), %ecx +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtss2si (%rax), %rcx # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2dq %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttpd2dqx (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dqx (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2dq %ymm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttpd2dqy (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcvttps2dq %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dqy (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2dq %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcvttps2dq %ymm0, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2dq %ymm0, %ymm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax), %ymm2 -# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - vcvttsd2si %xmm0, %ecx -# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - vcvttsd2si %xmm0, %rcx -# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - vcvttsd2si (%rax), %ecx -# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - vcvttsd2si (%rax), %rcx -# CHECK-NEXT: - - 1.33 0.33 - - - 1.33 - - vcvttss2si %xmm0, %ecx +# CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvttsd2si %xmm0, %ecx +# CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvttsd2si %xmm0, %rcx +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvttsd2si (%rax), %ecx +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvttsd2si (%rax), %rcx +# CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvttss2si %xmm0, %ecx # CHECK-NEXT: - - 1.33 0.33 - - - 1.33 - - vcvttss2si %xmm0, %rcx -# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - vcvttss2si (%rax), %ecx -# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - vcvttss2si (%rax), %rcx +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvttss2si (%rax), %ecx +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvttss2si (%rax), %rcx # CHECK-NEXT: - 3.00 1.00 - - - - - - - vdivpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - 4.00 1.00 - 0.50 0.50 - - - - vdivpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 5.00 1.00 - - - - - - - vdivpd %ymm0, %ymm1, %ymm2 @@ -1918,30 +1918,30 @@ vzeroupper # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmaskmovps (%rax), %ymm0, %ymm2 # CHECK-NEXT: - - 1.00 - 0.33 0.33 - - - 0.33 vmaskmovps %xmm0, %xmm1, (%rax) # CHECK-NEXT: - - 1.00 - 0.33 0.33 - - - 0.33 vmaskmovps %ymm0, %ymm1, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmaxpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmaxpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmaxpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmaxpd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmaxps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmaxps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmaxps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmaxps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmaxsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmaxsd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmaxss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmaxss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vminpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vminpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vminpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vminpd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vminps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vminps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vminps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vminps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vminsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vminsd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vminss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vminss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmaxpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmaxpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmaxpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmaxpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmaxps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmaxps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmaxps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmaxps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmaxsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmaxsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmaxss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmaxss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vminpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vminpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vminpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vminpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vminps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vminps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vminps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vminps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vminsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vminsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vminss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vminss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovapd %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovapd %xmm0, (%rax) # CHECK-NEXT: - - - - 0.50 0.50 - - - - vmovapd (%rax), %xmm2 @@ -2029,18 +2029,18 @@ vzeroupper # CHECK-NEXT: - - - - 0.50 0.50 - - - - vmovups (%rax), %ymm2 # CHECK-NEXT: - - - - - - - 2.00 - - vmpsadbw $1, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vmpsadbw $1, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmulpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmulpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmulpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmulpd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmulps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmulps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmulps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmulps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmulsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmulsd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmulss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmulss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmulpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmulpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmulpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmulpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmulps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmulps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmulps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmulps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmulsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmulsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmulss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmulss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vorpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vorpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vorpd %ymm0, %ymm1, %ymm2 @@ -2143,8 +2143,8 @@ vzeroupper # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - vphaddsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vphaddw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - vphaddw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vphminposuw %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vphminposuw (%rax), %xmm2 +# CHECK-NEXT: - - 1.00 - - - - - - - vphminposuw %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vphminposuw (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vphsubd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - vphsubd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - vphsubsw %xmm0, %xmm1, %xmm2 @@ -2159,10 +2159,10 @@ vzeroupper # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpinsrq $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - 2.00 - - vpinsrw $1, %eax, %xmm1, %xmm2 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpinsrw $1, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmaddubsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmaddubsw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmaddwd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmaddwd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmaddubsw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmaddubsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmaddwd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmaddwd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmaxsb %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmaxsb (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmaxsd %xmm0, %xmm1, %xmm2 @@ -2212,20 +2212,20 @@ vzeroupper # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpmovzxwd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - 1.00 - - vpmovzxwq %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpmovzxwq (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmuldq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmuldq (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmulhrsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmulhrsw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmulhuw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmulhuw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmulhw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmulhw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - vpmulld %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - vpmulld (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmullw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmullw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmuludq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmuludq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmuldq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmuldq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmulhrsw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmulhrsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmulhuw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmulhuw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmulhw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmulhw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 - - - - - - vpmulld %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vpmulld (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmullw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmullw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmuludq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmuludq (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpor %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpor (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - 1.00 - - vpsadbw %xmm0, %xmm1, %xmm2 @@ -2314,18 +2314,18 @@ vzeroupper # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vrcpps (%rax), %ymm2 # CHECK-NEXT: - - 1.00 - - - - - - - vrcpss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vrcpss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - vroundpd $1, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - vroundpd $1, (%rax), %xmm2 -# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - vroundpd $1, %ymm0, %ymm2 -# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - vroundpd $1, (%rax), %ymm2 -# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - vroundps $1, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - vroundps $1, (%rax), %xmm2 -# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - vroundps $1, %ymm0, %ymm2 -# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - vroundps $1, (%rax), %ymm2 -# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - vroundsd $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - vroundsd $1, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - vroundss $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - vroundss $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 - - - - - - vroundpd $1, %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vroundpd $1, (%rax), %xmm2 +# CHECK-NEXT: - - 1.00 1.00 - - - - - - vroundpd $1, %ymm0, %ymm2 +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vroundpd $1, (%rax), %ymm2 +# CHECK-NEXT: - - 1.00 1.00 - - - - - - vroundps $1, %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vroundps $1, (%rax), %xmm2 +# CHECK-NEXT: - - 1.00 1.00 - - - - - - vroundps $1, %ymm0, %ymm2 +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vroundps $1, (%rax), %ymm2 +# CHECK-NEXT: - - 1.00 1.00 - - - - - - vroundsd $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vroundsd $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 - - - - - - vroundss $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vroundss $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - vrsqrtps %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vrsqrtps (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - vrsqrtps %ymm0, %ymm2 @@ -2353,18 +2353,18 @@ vzeroupper # CHECK-NEXT: - 3.00 1.00 - - - - - - - vsqrtss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - 3.00 1.00 - 0.50 0.50 - - - - vsqrtss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 vstmxcsr (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vsubpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vsubpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vsubpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vsubpd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vsubps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vsubps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vsubps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vsubps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vsubsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vsubsd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vsubss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vsubss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vsubpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vsubpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vsubpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vsubpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vsubps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vsubps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vsubps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vsubps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vsubsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vsubsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vsubss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vsubss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - vtestpd %xmm0, %xmm1 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vtestpd (%rax), %xmm1 # CHECK-NEXT: - - 1.00 - - - - - - - vtestpd %ymm0, %ymm1 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx2.s index 5c626f26e65b..a50febf70bd0 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx2.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx2.s @@ -583,9 +583,9 @@ vpxor (%rax), %ymm1, %ymm2 # CHECK-NEXT: 4 10 2.00 * vphsubsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 3 3 2.00 vphsubw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 4 10 2.00 * vphsubw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vpmaddubsw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vpmaddubsw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vpmaddubsw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vpmaddwd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vpmaddwd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vpmaddwd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 2 7 0.50 * vpmaskmovd (%rax), %xmm0, %xmm2 # CHECK-NEXT: 2 8 0.50 * vpmaskmovd (%rax), %ymm0, %ymm2 @@ -644,19 +644,19 @@ vpxor (%rax), %ymm1, %ymm2 # CHECK-NEXT: 2 9 1.00 * vpmovzxwd (%rax), %ymm2 # CHECK-NEXT: 1 3 1.00 vpmovzxwq %xmm0, %ymm2 # CHECK-NEXT: 2 10 1.00 * vpmovzxwq (%rax), %ymm2 -# CHECK-NEXT: 1 4 0.33 vpmuldq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vpmuldq %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vpmuldq (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vpmulhrsw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vpmulhrsw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vpmulhrsw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vpmulhuw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vpmulhuw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vpmulhuw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vpmulhw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vpmulhw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vpmulhw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 2 10 0.67 vpmulld %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 3 17 0.67 * vpmulld (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vpmullw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 10 1.00 vpmulld %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 3 17 1.00 * vpmulld (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vpmullw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vpmullw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vpmuludq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vpmuludq %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vpmuludq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 1 0.33 vpor %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 8 0.50 * vpor (%rax), %ymm1, %ymm2 @@ -771,7 +771,7 @@ vpxor (%rax), %ymm1, %ymm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 107.00 86.00 85.17 85.17 1.00 169.00 - 1.67 +# CHECK-NEXT: - - 110.33 89.33 85.17 85.17 1.00 162.33 - 1.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -901,10 +901,10 @@ vpxor (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - vphsubsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vphsubw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - vphsubw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmaddubsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmaddubsw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmaddwd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmaddwd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmaddubsw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmaddubsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmaddwd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmaddwd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmaskmovd (%rax), %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmaskmovd (%rax), %ymm0, %ymm2 # CHECK-NEXT: - - 1.00 - 0.33 0.33 - - - 0.33 vpmaskmovd %xmm0, %xmm1, (%rax) @@ -962,20 +962,20 @@ vpxor (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpmovzxwd (%rax), %ymm2 # CHECK-NEXT: - - - - - - - 1.00 - - vpmovzxwq %xmm0, %ymm2 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpmovzxwq (%rax), %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmuldq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmuldq (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmulhrsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmulhrsw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmulhuw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmulhuw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmulhw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmulhw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - vpmulld %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - vpmulld (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmullw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmullw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmuludq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmuludq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmuldq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmuldq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmulhrsw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmulhrsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmulhuw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmulhuw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmulhw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmulhw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 1.00 1.00 - - - - - - vpmulld %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vpmulld (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmullw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmullw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmuludq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmuludq (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpor %ymm0, %ymm1, %ymm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpor (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - 1.00 - - vpsadbw %ymm0, %ymm1, %ymm2 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-f16c.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-f16c.s index 71ffbbbe13f4..0da66b3e1a3d 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-f16c.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-f16c.s @@ -45,15 +45,15 @@ vcvtps2ph $0, %ymm0, (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 2.67 2.67 1.67 1.67 2.00 8.67 - 0.67 +# CHECK-NEXT: - - 4.00 4.00 1.67 1.67 2.00 6.00 - 0.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtph2ps %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtph2ps (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtph2ps %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtph2ps (%rax), %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2ph $0, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.33 0.33 1.00 1.33 - 0.33 vcvtps2ph $0, %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2ph $0, %ymm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.33 0.33 1.00 1.33 - 0.33 vcvtps2ph $0, %ymm0, (%rax) +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtph2ps %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtph2ps (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtph2ps %xmm0, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtph2ps (%rax), %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtps2ph $0, %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.33 0.33 1.00 1.00 - 0.33 vcvtps2ph $0, %xmm0, (%rax) +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtps2ph $0, %ymm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.33 0.33 1.00 1.00 - 0.33 vcvtps2ph $0, %ymm0, (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-fma.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-fma.s index ade33fb10337..a3a405f3ac19 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-fma.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-fma.s @@ -298,197 +298,197 @@ vfnmsub231ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: [6]: HasSideEffects # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 4 0.33 vfmadd132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmadd132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmadd213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmadd213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmadd231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmadd231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmadd132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmadd132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmadd213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmadd213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmadd231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmadd231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmadd132sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmadd132sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfmadd132sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmadd213sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmadd213sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfmadd213sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmadd231sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmadd231sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfmadd231sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmadd132ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmadd132ss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfmadd132ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmadd213ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmadd213ss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfmadd213ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmadd231ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmadd231ss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfmadd231ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmaddsub132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmaddsub132pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmaddsub132pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmaddsub132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmaddsub132pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmaddsub132pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmaddsub213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmaddsub213pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmaddsub213pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmaddsub213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmaddsub213pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmaddsub213pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmaddsub231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmaddsub231pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmaddsub231pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmaddsub231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmaddsub231pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmaddsub231pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmaddsub132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmaddsub132ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmaddsub132ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmaddsub132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmaddsub132ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmaddsub132ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmaddsub213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmaddsub213ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmaddsub213ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmaddsub213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmaddsub213ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmaddsub213ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmaddsub231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmaddsub231ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmaddsub231ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmaddsub231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmaddsub231ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmaddsub231ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmsub132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsub132pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmsub132pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsub132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmsub132pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmsub132pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmsub213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsub213pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmsub213pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsub213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmsub213pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmsub213pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmsub231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsub231pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmsub231pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsub231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmsub231pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmsub231pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmsub132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsub132ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmsub132ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsub132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmsub132ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmsub132ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmsub213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsub213ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmsub213ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsub213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmsub213ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmsub213ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmsub231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsub231ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmsub231ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsub231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmsub231ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmsub231ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmsub132sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsub132sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfmsub132sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsub213sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsub213sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfmsub213sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsub231sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsub231sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfmsub231sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsub132ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsub132ss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfmsub132ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsub213ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsub213ss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfmsub213ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsub231ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsub231ss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfmsub231ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsubadd132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsubadd132pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmsubadd132pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsubadd132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmsubadd132pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmsubadd132pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmsubadd213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsubadd213pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmsubadd213pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsubadd213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmsubadd213pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmsubadd213pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmsubadd231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsubadd231pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmsubadd231pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsubadd231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmsubadd231pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmsubadd231pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmsubadd132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsubadd132ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmsubadd132ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsubadd132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmsubadd132ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmsubadd132ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmsubadd213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsubadd213ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmsubadd213ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsubadd213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmsubadd213ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmsubadd213ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmsubadd231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsubadd231ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmsubadd231ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsubadd231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmsubadd231ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmsubadd231ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd132pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfnmadd132pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd132pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfnmadd132pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd213pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfnmadd213pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd213pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfnmadd213pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd231pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfnmadd231pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd231pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfnmadd231pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd132ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfnmadd132ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd132ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfnmadd132ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd213ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfnmadd213ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd213ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfnmadd213ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd231ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfnmadd231ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd231ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfnmadd231ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd132sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd132sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfnmadd132sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd213sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd213sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfnmadd213sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd231sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd231sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfnmadd231sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd132ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd132ss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfnmadd132ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd213ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd213ss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfnmadd213ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd231ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd231ss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfnmadd231ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub132pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfnmsub132pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub132pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfnmsub132pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub213pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfnmsub213pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub213pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfnmsub213pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub231pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfnmsub231pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub231pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfnmsub231pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub132ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfnmsub132ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub132ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfnmsub132ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub213ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfnmsub213ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub213ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfnmsub213ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub231ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfnmsub231ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub231ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfnmsub231ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub132sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub132sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfnmsub132sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub213sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub213sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfnmsub213sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub231sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub231sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfnmsub231sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub132ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub132ss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfnmsub132ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub213ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub213ss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfnmsub213ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub231ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub231ss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfnmsub231ss (%rax), %xmm1, %xmm2 # CHECK: Resources: @@ -505,199 +505,199 @@ vfnmsub231ss (%rax), %xmm1, %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 64.00 64.00 48.00 48.00 - 64.00 - - +# CHECK-NEXT: - - 96.00 96.00 48.00 48.00 - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd132pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd132pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd213pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd213pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd231pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd231pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd132ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd132ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd213ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd213ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd231ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd231ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd132sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd132sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd213sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd213sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd231sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd231sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd132ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd132ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd213ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd213ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd231ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd231ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub132pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub132pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub213pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub213pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub231pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub231pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub132ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub132ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub213ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub213ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub231ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub231ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub132pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub132pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub213pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub213pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub231pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub231pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub132ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub132ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub213ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub213ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub231ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub231ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub132sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub132sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub213sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub213sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub231sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub231sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub132ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub132ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub213ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub213ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub231ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub231ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd132pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd132pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd213pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd213pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd231pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd231pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd132ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd132ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd213ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd213ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd231ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd231ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd132pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd132pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd213pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd213pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd231pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd231pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd132ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd132ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd213ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd213ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd231ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd231ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd132sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd132sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd213sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd213sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd231sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd231sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd132ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd132ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd213ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd213ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd231ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd231ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub132pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub132pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub213pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub213pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub231pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub231pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub132ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub132ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub213ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub213ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub231ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub231ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub132sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub132sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub213sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub213sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub231sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub231sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub132ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub132ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub213ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub213ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub231ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub231ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub132sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub132sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub213sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub213sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub231sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub231sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub132ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub132ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub213ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub213ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub231ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub231ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd132sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd132sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd213sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd213sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd231sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd231sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd132ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd132ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd213ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd213ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd231ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd231ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub132sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub132sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub213sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub213sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub231sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub231sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub132ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub132ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub213ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub213ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub231ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub231ss (%rax), %xmm1, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse1.s index a89aba469035..9088a7705308 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse1.s @@ -194,17 +194,17 @@ xorps (%rax), %xmm2 # CHECK-NEXT: [6]: HasSideEffects # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 4 0.33 addps %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 addps %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * addps (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 addss %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 addss %xmm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * addss (%rax), %xmm2 # CHECK-NEXT: 1 1 0.33 andnps %xmm0, %xmm2 # CHECK-NEXT: 2 7 0.50 * andnps (%rax), %xmm2 # CHECK-NEXT: 1 1 0.33 andps %xmm0, %xmm2 # CHECK-NEXT: 2 7 0.50 * andps (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 cmpps $0, %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 cmpps $0, %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * cmpps $0, (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 cmpss $0, %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 cmpss $0, %xmm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * cmpss $0, (%rax), %xmm2 # CHECK-NEXT: 1 2 1.00 comiss %xmm0, %xmm1 # CHECK-NEXT: 2 7 1.00 * comiss (%rax), %xmm1 @@ -217,12 +217,12 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2ssl (%rax), %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2ssl (%rax), %xmm2 # CHECK-NEXT: 2 6 1.00 cvtss2si %xmm0, %ecx -# CHECK-NEXT: 2 6 1.00 cvtss2si %xmm0, %rcx +# CHECK-NEXT: 3 7 1.00 cvtss2si %xmm0, %rcx # CHECK-NEXT: 3 11 1.00 * cvtss2si (%rax), %ecx # CHECK-NEXT: 3 11 1.00 * cvtss2si (%rax), %rcx # CHECK-NEXT: 2 5 1.00 cvttps2pi %xmm0, %mm2 # CHECK-NEXT: 2 9 0.50 * cvttps2pi (%rax), %mm2 -# CHECK-NEXT: 3 7 1.00 cvttss2si %xmm0, %ecx +# CHECK-NEXT: 2 6 1.00 cvttss2si %xmm0, %ecx # CHECK-NEXT: 3 7 1.00 cvttss2si %xmm0, %rcx # CHECK-NEXT: 3 11 1.00 * cvttss2si (%rax), %ecx # CHECK-NEXT: 4 12 1.00 * cvttss2si (%rax), %rcx @@ -232,13 +232,13 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 2 16 3.00 * divss (%rax), %xmm2 # CHECK-NEXT: 3 7 1.00 * * * ldmxcsr (%rax) # CHECK-NEXT: 1 1 1.00 * * * maskmovq %mm0, %mm1 -# CHECK-NEXT: 1 4 0.33 maxps %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 maxps %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * maxps (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 maxss %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 maxss %xmm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * maxss (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 minps %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 minps %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * minps (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 minss %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 minss %xmm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * minss (%rax), %xmm2 # CHECK-NEXT: 1 1 0.33 movaps %xmm0, %xmm2 # CHECK-NEXT: 2 1 1.00 * movaps %xmm0, (%rax) @@ -258,9 +258,9 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 1 1 0.33 movups %xmm0, %xmm2 # CHECK-NEXT: 2 1 1.00 * movups %xmm0, (%rax) # CHECK-NEXT: 1 6 0.50 * movups (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 mulps %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 mulps %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * mulps (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 mulss %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 mulss %xmm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * mulss (%rax), %xmm2 # CHECK-NEXT: 1 1 0.33 orps %xmm0, %xmm2 # CHECK-NEXT: 2 7 0.50 * orps (%rax), %xmm2 @@ -306,9 +306,9 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 1 12 3.00 sqrtss %xmm0, %xmm2 # CHECK-NEXT: 2 17 3.00 * sqrtss (%rax), %xmm2 # CHECK-NEXT: 3 2 1.00 * * * stmxcsr (%rax) -# CHECK-NEXT: 1 4 0.33 subps %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 subps %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * subps (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 subss %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 subss %xmm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * subss (%rax), %xmm2 # CHECK-NEXT: 1 2 1.00 ucomiss %xmm0, %xmm1 # CHECK-NEXT: 2 7 1.00 * ucomiss (%rax), %xmm1 @@ -333,25 +333,25 @@ xorps (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 26.00 65.50 18.50 32.00 32.00 8.00 45.50 0.50 3.00 +# CHECK-NEXT: - 26.00 65.83 25.83 32.00 32.00 8.00 37.83 0.50 3.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - addps %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - addps (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - addss %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - addss (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - addps %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - addps (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - addss %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - addss (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - andnps %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - andnps (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - andps %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - andps (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - cmpps $0, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cmpps $0, (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - cmpss $0, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cmpss $0, (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - cmpps $0, %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cmpps $0, (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - cmpss $0, %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cmpss $0, (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - comiss %xmm0, %xmm1 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - comiss (%rax), %xmm1 -# CHECK-NEXT: - - 2.00 - - - - - - - cvtpi2ps %mm0, %xmm2 +# CHECK-NEXT: - - - - - - - 2.00 - - cvtpi2ps %mm0, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - cvtpi2ps (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtps2pi %xmm0, %mm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cvtps2pi (%rax), %mm2 @@ -359,15 +359,15 @@ xorps (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - cvtsi2ssq %rcx, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2ssl (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2ssl (%rax), %xmm2 -# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - cvtss2si %xmm0, %ecx -# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - cvtss2si %xmm0, %rcx -# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - cvtss2si (%rax), %ecx -# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - cvtss2si (%rax), %rcx +# CHECK-NEXT: - - 1.00 1.00 - - - - - - cvtss2si %xmm0, %ecx +# CHECK-NEXT: - - 1.33 0.33 - - - 1.33 - - cvtss2si %xmm0, %rcx +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvtss2si (%rax), %ecx +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvtss2si (%rax), %rcx # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvttps2pi %xmm0, %mm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cvttps2pi (%rax), %mm2 -# CHECK-NEXT: - - 1.33 0.33 - - - 1.33 - - cvttss2si %xmm0, %ecx +# CHECK-NEXT: - - 1.00 1.00 - - - - - - cvttss2si %xmm0, %ecx # CHECK-NEXT: - - 1.33 0.33 - - - 1.33 - - cvttss2si %xmm0, %rcx -# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - cvttss2si (%rax), %ecx +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvttss2si (%rax), %ecx # CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 1.33 - - cvttss2si (%rax), %rcx # CHECK-NEXT: - 3.00 1.00 - - - - - - - divps %xmm0, %xmm2 # CHECK-NEXT: - 5.00 1.00 - 0.50 0.50 - - - - divps (%rax), %xmm2 @@ -375,14 +375,14 @@ xorps (%rax), %xmm2 # CHECK-NEXT: - 3.00 1.00 - 0.50 0.50 - - - - divss (%rax), %xmm2 # CHECK-NEXT: - - 1.25 0.25 0.50 0.50 - 0.25 0.25 - ldmxcsr (%rax) # CHECK-NEXT: - - - - - - - 1.00 - - maskmovq %mm0, %mm1 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - maxps %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - maxps (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - maxss %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - maxss (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - minps %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - minps (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - minss %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - minss (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - maxps %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - maxps (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - maxss %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - maxss (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - minps %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - minps (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - minss %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - minss (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - movaps %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 movaps %xmm0, (%rax) # CHECK-NEXT: - - - - 0.50 0.50 - - - - movaps (%rax), %xmm2 @@ -401,10 +401,10 @@ xorps (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - movups %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 movups %xmm0, (%rax) # CHECK-NEXT: - - - - 0.50 0.50 - - - - movups (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - mulps %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - mulps (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - mulss %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - mulss (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - mulps %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - mulps (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - mulss %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - mulss (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - orps %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - orps (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - pavgb %mm0, %mm2 @@ -449,10 +449,10 @@ xorps (%rax), %xmm2 # CHECK-NEXT: - 3.00 1.00 - - - - - - - sqrtss %xmm0, %xmm2 # CHECK-NEXT: - 3.00 1.00 - 0.50 0.50 - - - - sqrtss (%rax), %xmm2 # CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 stmxcsr (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - subps %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - subps (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - subss %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - subss (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - subps %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - subps (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - subss %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - subss (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - ucomiss %xmm0, %xmm1 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - ucomiss (%rax), %xmm1 # CHECK-NEXT: - - - - - - - 1.00 - - unpckhps %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s index dddf1119dd49..d39f7c0dd9d2 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s @@ -402,24 +402,24 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: [6]: HasSideEffects # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 4 0.33 addpd %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 addpd %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * addpd (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 addsd %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 addsd %xmm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * addsd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.33 andnpd %xmm0, %xmm2 # CHECK-NEXT: 2 7 0.50 * andnpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.33 andpd %xmm0, %xmm2 # CHECK-NEXT: 2 7 0.50 * andpd (%rax), %xmm2 # CHECK-NEXT: 2 2 1.00 * * * clflush (%rax) -# CHECK-NEXT: 1 4 0.33 cmppd $0, %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 cmppd $0, %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * cmppd $0, (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 cmpsd $0, %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 cmpsd $0, %xmm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * cmpsd $0, (%rax), %xmm2 # CHECK-NEXT: 1 2 1.00 comisd %xmm0, %xmm1 # CHECK-NEXT: 2 7 1.00 * comisd (%rax), %xmm1 -# CHECK-NEXT: 2 5 1.00 cvtdq2pd %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 cvtdq2pd %xmm0, %xmm2 # CHECK-NEXT: 3 11 1.00 * cvtdq2pd (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 cvtdq2ps %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 cvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * cvtdq2ps (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 cvtpd2dq %xmm0, %xmm2 # CHECK-NEXT: 3 11 1.00 * cvtpd2dq (%rax), %xmm2 @@ -427,9 +427,9 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 3 11 1.00 * cvtpd2pi (%rax), %mm2 # CHECK-NEXT: 2 5 1.00 cvtpd2ps %xmm0, %xmm2 # CHECK-NEXT: 3 11 1.00 * cvtpd2ps (%rax), %xmm2 -# CHECK-NEXT: 2 5 1.00 cvtpi2pd %mm0, %xmm2 -# CHECK-NEXT: 3 10 1.00 * cvtpi2pd (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 cvtps2dq %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 cvtpi2pd %mm0, %xmm2 +# CHECK-NEXT: 2 9 0.50 * cvtpi2pd (%rax), %xmm2 +# CHECK-NEXT: 1 4 0.50 cvtps2dq %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * cvtps2dq (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 cvtps2pd %xmm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * cvtps2pd (%rax), %xmm2 @@ -449,7 +449,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 3 11 1.00 * cvttpd2dq (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 cvttpd2pi %xmm0, %mm2 # CHECK-NEXT: 3 11 1.00 * cvttpd2pi (%rax), %mm2 -# CHECK-NEXT: 1 4 0.33 cvttps2dq %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 cvttps2dq %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * cvttps2dq (%rax), %xmm2 # CHECK-NEXT: 2 6 1.00 cvttsd2si %xmm0, %ecx # CHECK-NEXT: 2 6 1.00 cvttsd2si %xmm0, %rcx @@ -461,13 +461,13 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 19 4.00 * divsd (%rax), %xmm2 # CHECK-NEXT: 2 2 0.50 * * * lfence # CHECK-NEXT: 2 1 1.00 * * * maskmovdqu %xmm0, %xmm1 -# CHECK-NEXT: 1 4 0.33 maxpd %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 maxpd %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * maxpd (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 maxsd %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 maxsd %xmm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * maxsd (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 minpd %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 minpd %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * minpd (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 minsd %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 minsd %xmm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * minsd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.33 movapd %xmm0, %xmm2 # CHECK-NEXT: 2 1 1.00 * movapd %xmm0, (%rax) @@ -504,9 +504,9 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.33 movupd %xmm0, %xmm2 # CHECK-NEXT: 2 1 1.00 * movupd %xmm0, (%rax) # CHECK-NEXT: 1 6 0.50 * movupd (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 mulpd %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 mulpd %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * mulpd (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 mulsd %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 mulsd %xmm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * mulsd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.33 orpd %xmm0, %xmm2 # CHECK-NEXT: 2 7 0.50 * orpd (%rax), %xmm2 @@ -555,7 +555,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 pcmpgtw %xmm0, %xmm2 # CHECK-NEXT: 2 7 0.50 * pcmpgtw (%rax), %xmm2 # CHECK-NEXT: 2 3 1.00 pextrw $1, %xmm0, %ecx -# CHECK-NEXT: 1 4 0.33 pmaddwd %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 pmaddwd %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * pmaddwd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 pmaxsw %xmm0, %xmm2 # CHECK-NEXT: 2 7 0.50 * pmaxsw (%rax), %xmm2 @@ -566,15 +566,15 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 pminub %xmm0, %xmm2 # CHECK-NEXT: 2 7 0.50 * pminub (%rax), %xmm2 # CHECK-NEXT: 1 2 1.00 pmovmskb %xmm0, %ecx -# CHECK-NEXT: 1 4 0.33 pmulhuw %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 pmulhuw %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * pmulhuw (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 pmulhw %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 pmulhw %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * pmulhw (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 pmullw %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 pmullw %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * pmullw (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 pmuludq %mm0, %mm2 # CHECK-NEXT: 2 9 1.00 * pmuludq (%rax), %mm2 -# CHECK-NEXT: 1 4 0.33 pmuludq %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 pmuludq %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * pmuludq (%rax), %xmm2 # CHECK-NEXT: 1 1 0.33 por %xmm0, %xmm2 # CHECK-NEXT: 2 7 0.50 * por (%rax), %xmm2 @@ -654,9 +654,9 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 24 6.00 * sqrtpd (%rax), %xmm2 # CHECK-NEXT: 1 18 6.00 sqrtsd %xmm0, %xmm2 # CHECK-NEXT: 2 23 6.00 * sqrtsd (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 subpd %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 subpd %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * subpd (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 subsd %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 subsd %xmm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * subsd (%rax), %xmm2 # CHECK-NEXT: 1 2 1.00 ucomisd %xmm0, %xmm1 # CHECK-NEXT: 2 7 1.00 * ucomisd (%rax), %xmm1 @@ -681,28 +681,28 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 38.00 101.08 69.08 62.67 62.67 14.00 112.08 1.75 4.67 +# CHECK-NEXT: - 38.00 103.08 82.08 62.67 62.67 14.00 94.08 1.75 4.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - addpd %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - addpd (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - addsd %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - addsd (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - addpd %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - addpd (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - addsd %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - addsd (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - andnpd %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - andnpd (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - andpd %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - andpd (%rax), %xmm2 # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 1.25 - clflush (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - cmppd $0, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cmppd $0, (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - cmpsd $0, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cmpsd $0, (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - cmppd $0, %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cmppd $0, (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - cmpsd $0, %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cmpsd $0, (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - comisd %xmm0, %xmm1 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - comisd (%rax), %xmm1 -# CHECK-NEXT: - - 1.00 - - - - 1.00 - - cvtdq2pd %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvtdq2pd %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - cvtdq2pd (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - cvtdq2ps %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cvtdq2ps (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtpd2dq %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - cvtpd2dq (%rax), %xmm2 @@ -710,16 +710,16 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - cvtpd2pi (%rax), %mm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtpd2ps %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - cvtpd2ps (%rax), %xmm2 -# CHECK-NEXT: - - 1.00 - - - - 1.00 - - cvtpi2pd %mm0, %xmm2 -# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - cvtpi2pd (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - cvtps2dq %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvtpi2pd %mm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtpi2pd (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvtps2dq %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cvtps2dq (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtps2pd %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cvtps2pd (%rax), %xmm2 -# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - cvtsd2si %xmm0, %ecx -# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - cvtsd2si %xmm0, %rcx -# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - cvtsd2si (%rax), %ecx -# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - cvtsd2si (%rax), %rcx +# CHECK-NEXT: - - 1.00 1.00 - - - - - - cvtsd2si %xmm0, %ecx +# CHECK-NEXT: - - 1.00 1.00 - - - - - - cvtsd2si %xmm0, %rcx +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvtsd2si (%rax), %ecx +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvtsd2si (%rax), %rcx # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtsd2ss %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - cvtsd2ss (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtsi2sdl %ecx, %xmm2 @@ -732,26 +732,26 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - cvttpd2dq (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvttpd2pi %xmm0, %mm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - cvttpd2pi (%rax), %mm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - cvttps2dq %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvttps2dq %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cvttps2dq (%rax), %xmm2 -# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - cvttsd2si %xmm0, %ecx -# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - cvttsd2si %xmm0, %rcx -# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - cvttsd2si (%rax), %ecx -# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - cvttsd2si (%rax), %rcx +# CHECK-NEXT: - - 1.00 1.00 - - - - - - cvttsd2si %xmm0, %ecx +# CHECK-NEXT: - - 1.00 1.00 - - - - - - cvttsd2si %xmm0, %rcx +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvttsd2si (%rax), %ecx +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvttsd2si (%rax), %rcx # CHECK-NEXT: - 3.00 1.00 - - - - - - - divpd %xmm0, %xmm2 # CHECK-NEXT: - 4.00 1.00 - 0.50 0.50 - - - - divpd (%rax), %xmm2 # CHECK-NEXT: - 3.00 1.00 - - - - - - - divsd %xmm0, %xmm2 # CHECK-NEXT: - 4.00 1.00 - 0.50 0.50 - - - - divsd (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - 0.50 0.50 - lfence # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 maskmovdqu %xmm0, %xmm1 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - maxpd %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - maxpd (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - maxsd %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - maxsd (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - minpd %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - minpd (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - minsd %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - minsd (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - maxpd %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - maxpd (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - maxsd %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - maxsd (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - minpd %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - minpd (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - minsd %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - minsd (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - movapd %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 movapd %xmm0, (%rax) # CHECK-NEXT: - - - - 0.50 0.50 - - - - movapd (%rax), %xmm2 @@ -787,10 +787,10 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - movupd %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 movupd %xmm0, (%rax) # CHECK-NEXT: - - - - 0.50 0.50 - - - - movupd (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - mulpd %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - mulpd (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - mulsd %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - mulsd (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - mulpd %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - mulpd (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - mulsd %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - mulsd (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - orpd %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - orpd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - 1.00 - - packssdw %xmm0, %xmm2 @@ -838,8 +838,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - pcmpgtw %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pcmpgtw (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - 1.00 - - pextrw $1, %xmm0, %ecx -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - pmaddwd %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - pmaddwd (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmaddwd %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmaddwd (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - pmaxsw %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmaxsw (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - pmaxub %xmm0, %xmm2 @@ -849,16 +849,16 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - pminub %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pminub (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - pmovmskb %xmm0, %ecx -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - pmulhuw %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - pmulhuw (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - pmulhw %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - pmulhw (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - pmullw %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - pmullw (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmulhuw %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmulhuw (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmulhw %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmulhw (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmullw %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmullw (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - pmuludq %mm0, %mm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - pmuludq (%rax), %mm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - pmuludq %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - pmuludq (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmuludq %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmuludq (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - por %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - por (%rax), %xmm2 # CHECK-NEXT: - - - - - - - 1.00 - - psadbw %xmm0, %xmm2 @@ -937,10 +937,10 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - 6.00 1.00 - 0.50 0.50 - - - - sqrtpd (%rax), %xmm2 # CHECK-NEXT: - 6.00 1.00 - - - - - - - sqrtsd %xmm0, %xmm2 # CHECK-NEXT: - 6.00 1.00 - 0.50 0.50 - - - - sqrtsd (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - subpd %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - subpd (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - subsd %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - subsd (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - subpd %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - subpd (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - subsd %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - subsd (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - ucomisd %xmm0, %xmm1 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - ucomisd (%rax), %xmm1 # CHECK-NEXT: - - - - - - - 1.00 - - unpckhpd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse3.s index 5367f880fc88..4fac1f6a504a 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse3.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse3.s @@ -39,9 +39,9 @@ movsldup (%rax), %xmm2 # CHECK-NEXT: [6]: HasSideEffects # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 4 0.33 addsubpd %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 addsubpd %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * addsubpd (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 addsubps %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 addsubps %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * addsubps (%rax), %xmm2 # CHECK-NEXT: 3 6 2.00 haddpd %xmm0, %xmm2 # CHECK-NEXT: 4 12 2.00 * haddpd (%rax), %xmm2 @@ -73,14 +73,14 @@ movsldup (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 4.00 4.00 5.00 5.00 - 23.00 - - +# CHECK-NEXT: - - 4.67 4.67 5.00 5.00 - 21.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - addsubpd %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - addsubpd (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - addsubps %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - addsubps (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - addsubpd %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - addsubpd (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - addsubps %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - addsubps (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - haddpd %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - haddpd (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - haddps %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse41.s index 0d39529338e8..d49f33cce151 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse41.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse41.s @@ -189,8 +189,8 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: 2 3 1.00 pextrq $1, %xmm0, %rcx # CHECK-NEXT: 3 2 1.00 * pextrq $1, %xmm0, (%rax) # CHECK-NEXT: 3 2 1.00 * pextrw $1, %xmm0, (%rax) -# CHECK-NEXT: 1 4 0.33 phminposuw %xmm0, %xmm2 -# CHECK-NEXT: 2 10 0.50 * phminposuw (%rax), %xmm2 +# CHECK-NEXT: 1 4 1.00 phminposuw %xmm0, %xmm2 +# CHECK-NEXT: 2 10 1.00 * phminposuw (%rax), %xmm2 # CHECK-NEXT: 2 2 2.00 pinsrb $1, %eax, %xmm1 # CHECK-NEXT: 2 6 1.00 * pinsrb $1, (%rax), %xmm1 # CHECK-NEXT: 2 2 2.00 pinsrd $1, %eax, %xmm1 @@ -237,20 +237,20 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: 2 6 1.00 * pmovzxwd (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 pmovzxwq %xmm0, %xmm2 # CHECK-NEXT: 2 6 1.00 * pmovzxwq (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 pmuldq %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 pmuldq %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * pmuldq (%rax), %xmm2 -# CHECK-NEXT: 2 10 0.67 pmulld %xmm0, %xmm2 -# CHECK-NEXT: 3 16 0.67 * pmulld (%rax), %xmm2 +# CHECK-NEXT: 2 10 1.00 pmulld %xmm0, %xmm2 +# CHECK-NEXT: 3 16 1.00 * pmulld (%rax), %xmm2 # CHECK-NEXT: 2 3 1.00 ptest %xmm0, %xmm1 # CHECK-NEXT: 3 9 1.00 * ptest (%rax), %xmm1 -# CHECK-NEXT: 2 8 0.67 roundpd $1, %xmm0, %xmm2 -# CHECK-NEXT: 3 14 0.67 * roundpd $1, (%rax), %xmm2 -# CHECK-NEXT: 2 8 0.67 roundps $1, %xmm0, %xmm2 -# CHECK-NEXT: 3 14 0.67 * roundps $1, (%rax), %xmm2 -# CHECK-NEXT: 2 8 0.67 roundsd $1, %xmm0, %xmm2 -# CHECK-NEXT: 3 14 0.67 * roundsd $1, (%rax), %xmm2 -# CHECK-NEXT: 2 8 0.67 roundss $1, %xmm0, %xmm2 -# CHECK-NEXT: 3 14 0.67 * roundss $1, (%rax), %xmm2 +# CHECK-NEXT: 2 8 1.00 roundpd $1, %xmm0, %xmm2 +# CHECK-NEXT: 3 14 1.00 * roundpd $1, (%rax), %xmm2 +# CHECK-NEXT: 2 8 1.00 roundps $1, %xmm0, %xmm2 +# CHECK-NEXT: 3 14 1.00 * roundps $1, (%rax), %xmm2 +# CHECK-NEXT: 2 8 1.00 roundsd $1, %xmm0, %xmm2 +# CHECK-NEXT: 3 14 1.00 * roundsd $1, (%rax), %xmm2 +# CHECK-NEXT: 2 8 1.00 roundss $1, %xmm0, %xmm2 +# CHECK-NEXT: 3 14 1.00 * roundss $1, (%rax), %xmm2 # CHECK: Resources: # CHECK-NEXT: [0] - SKXDivider @@ -266,7 +266,7 @@ roundss $1, (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 31.67 25.67 23.67 23.67 5.00 74.67 - 1.67 +# CHECK-NEXT: - - 36.67 28.67 23.67 23.67 5.00 66.67 - 1.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -304,8 +304,8 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - 1.00 - - pextrq $1, %xmm0, %rcx # CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 pextrq $1, %xmm0, (%rax) # CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 pextrw $1, %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - phminposuw %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - phminposuw (%rax), %xmm2 +# CHECK-NEXT: - - 1.00 - - - - - - - phminposuw %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - phminposuw (%rax), %xmm2 # CHECK-NEXT: - - - - - - - 2.00 - - pinsrb $1, %eax, %xmm1 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - pinsrb $1, (%rax), %xmm1 # CHECK-NEXT: - - - - - - - 2.00 - - pinsrd $1, %eax, %xmm1 @@ -352,17 +352,17 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - pmovzxwd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - 1.00 - - pmovzxwq %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - pmovzxwq (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - pmuldq %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - pmuldq (%rax), %xmm2 -# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - pmulld %xmm0, %xmm2 -# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - pmulld (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmuldq %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmuldq (%rax), %xmm2 +# CHECK-NEXT: - - 1.00 1.00 - - - - - - pmulld %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - pmulld (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - 1.00 - - ptest %xmm0, %xmm1 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - ptest (%rax), %xmm1 -# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - roundpd $1, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - roundpd $1, (%rax), %xmm2 -# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - roundps $1, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - roundps $1, (%rax), %xmm2 -# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - roundsd $1, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - roundsd $1, (%rax), %xmm2 -# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - roundss $1, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - roundss $1, (%rax), %xmm2 +# CHECK-NEXT: - - 1.00 1.00 - - - - - - roundpd $1, %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - roundpd $1, (%rax), %xmm2 +# CHECK-NEXT: - - 1.00 1.00 - - - - - - roundps $1, %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - roundps $1, (%rax), %xmm2 +# CHECK-NEXT: - - 1.00 1.00 - - - - - - roundsd $1, %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - roundsd $1, (%rax), %xmm2 +# CHECK-NEXT: - - 1.00 1.00 - - - - - - roundss $1, %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - roundss $1, (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-ssse3.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-ssse3.s index b4db1a2b6499..a213d059e601 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-ssse3.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-ssse3.s @@ -148,11 +148,11 @@ psignw (%rax), %xmm2 # CHECK-NEXT: 4 9 2.00 * phsubw (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 pmaddubsw %mm0, %mm2 # CHECK-NEXT: 2 9 1.00 * pmaddubsw (%rax), %mm2 -# CHECK-NEXT: 1 4 0.33 pmaddubsw %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 pmaddubsw %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * pmaddubsw (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 pmulhrsw %mm0, %mm2 # CHECK-NEXT: 2 9 1.00 * pmulhrsw (%rax), %mm2 -# CHECK-NEXT: 1 4 0.33 pmulhrsw %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 pmulhrsw %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * pmulhrsw (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 pshufb %mm0, %mm2 # CHECK-NEXT: 2 6 1.00 * pshufb (%rax), %mm2 @@ -185,7 +185,7 @@ psignw (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 30.00 12.00 16.00 16.00 - 70.00 - - +# CHECK-NEXT: - - 30.67 12.67 16.00 16.00 - 68.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -231,12 +231,12 @@ psignw (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - phsubw (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - pmaddubsw %mm0, %mm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - pmaddubsw (%rax), %mm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - pmaddubsw %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - pmaddubsw (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmaddubsw %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmaddubsw (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - pmulhrsw %mm0, %mm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - pmulhrsw (%rax), %mm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - pmulhrsw %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - pmulhrsw (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmulhrsw %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmulhrsw (%rax), %xmm2 # CHECK-NEXT: - - - - - - - 1.00 - - pshufb %mm0, %mm2 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - pshufb (%rax), %mm2 # CHECK-NEXT: - - - - - - - 1.00 - - pshufb %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-x86_64.s index 11d6a5696050..fd5823e01a36 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-x86_64.s @@ -822,13 +822,13 @@ xorq (%rax), %rdi # CHECK-NEXT: 3 7 1.00 * * negl (%rax) # CHECK-NEXT: 1 1 0.25 negq %rcx # CHECK-NEXT: 3 7 1.00 * * negq (%r10) -# CHECK-NEXT: 1 1 0.25 nop -# CHECK-NEXT: 1 1 0.25 nopw %di -# CHECK-NEXT: 1 1 0.25 nopw (%rcx) -# CHECK-NEXT: 1 1 0.25 nopl %esi -# CHECK-NEXT: 1 1 0.25 nopl (%r8) -# CHECK-NEXT: 1 1 0.25 nopq %rdx -# CHECK-NEXT: 1 1 0.25 nopq (%r9) +# CHECK-NEXT: 1 1 0.17 nop +# CHECK-NEXT: 1 1 0.17 nopw %di +# CHECK-NEXT: 1 1 0.17 nopw (%rcx) +# CHECK-NEXT: 1 1 0.17 nopl %esi +# CHECK-NEXT: 1 1 0.17 nopl (%r8) +# CHECK-NEXT: 1 1 0.17 nopq %rdx +# CHECK-NEXT: 1 1 0.17 nopq (%r9) # CHECK-NEXT: 1 1 0.25 notb %dil # CHECK-NEXT: 3 7 1.00 * * notb (%r8) # CHECK-NEXT: 1 1 0.25 notw %si @@ -1164,7 +1164,7 @@ xorq (%rax), %rdi # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: 60.00 - 431.50 225.50 202.00 202.00 167.00 186.00 416.00 69.00 +# CHECK-NEXT: 60.00 - 429.75 223.75 202.00 202.00 167.00 184.25 414.25 69.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1381,13 +1381,13 @@ xorq (%rax), %rdi # CHECK-NEXT: - - 0.25 0.25 0.83 0.83 1.00 0.25 0.25 0.33 negl (%rax) # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - negq %rcx # CHECK-NEXT: - - 0.25 0.25 0.83 0.83 1.00 0.25 0.25 0.33 negq (%r10) -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - nop -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - nopw %di -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - nopw (%rcx) -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - nopl %esi -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - nopl (%r8) -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - nopq %rdx -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - nopq (%r9) +# CHECK-NEXT: - - - - - - - - - - nop +# CHECK-NEXT: - - - - - - - - - - nopw %di +# CHECK-NEXT: - - - - - - - - - - nopw (%rcx) +# CHECK-NEXT: - - - - - - - - - - nopl %esi +# CHECK-NEXT: - - - - - - - - - - nopl (%r8) +# CHECK-NEXT: - - - - - - - - - - nopq %rdx +# CHECK-NEXT: - - - - - - - - - - nopq (%r9) # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - notb %dil # CHECK-NEXT: - - 0.25 0.25 0.83 0.83 1.00 0.25 0.25 0.33 notb (%r8) # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - notw %si