From 1233e1234ab8466b7997cc0a50683f85061b5e64 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 May 2018 20:52:53 +0000 Subject: [PATCH] [X86] Split WriteFAdd/WriteFCmp/WriteFMul schedule classes Split to support single/double for scalar, XMM and YMM/ZMM instructions - removing InstrRW overrides for these instructions. Fixes Atom ADDSUBPD instruction and reclassifies VFPCLASS as WriteFCmp which is closer in behaviour. llvm-svn: 331672 --- llvm/lib/Target/X86/X86InstrAVX512.td | 3 +- llvm/lib/Target/X86/X86InstrSSE.td | 40 ++++++------- llvm/lib/Target/X86/X86SchedBroadwell.td | 29 +++++++--- llvm/lib/Target/X86/X86SchedHaswell.td | 49 ++++++++-------- llvm/lib/Target/X86/X86SchedSandyBridge.td | 29 +++++++--- llvm/lib/Target/X86/X86SchedSkylakeClient.td | 47 +++++++-------- llvm/lib/Target/X86/X86SchedSkylakeServer.td | 43 +++++++------- llvm/lib/Target/X86/X86Schedule.td | 57 ++++++++++++------- llvm/lib/Target/X86/X86ScheduleAtom.td | 28 +++++---- llvm/lib/Target/X86/X86ScheduleBtVer2.td | 38 ++++--------- llvm/lib/Target/X86/X86ScheduleSLM.td | 26 ++++++--- llvm/lib/Target/X86/X86ScheduleZnver1.td | 12 ++++ llvm/test/CodeGen/X86/sse3-schedule.ll | 2 +- .../tools/llvm-mca/X86/Atom/resources-sse2.s | 6 +- .../tools/llvm-mca/X86/Atom/resources-sse3.s | 6 +- 15 files changed, 236 insertions(+), 179 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 94065dec4a88..958ec92e38c3 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2672,9 +2672,8 @@ multiclass avx512_fp_fpclass_all opcVec, EVEX_CD8<64, CD8VT1>, VEX_W; } -// FIXME: Is there a better scheduler class for VFPCLASS? defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass, - X86Vfpclasss, SchedWriteFAdd, HasDQI>, + X86Vfpclasss, SchedWriteFCmp, HasDQI>, AVX512AIi8Base, EVEX; //----------------------------------------------------------------- diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index fb422a740800..63f791ecc8c4 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1854,12 +1854,12 @@ let ExeDomain = SSEPackedSingle in defm VCMPSS : sse12_cmp_scalar, XS, VEX_4V, VEX_LIG, VEX_WIG; + SchedWriteFCmpSizes.PS.Scl>, XS, VEX_4V, VEX_LIG, VEX_WIG; let ExeDomain = SSEPackedDouble in defm VCMPSD : sse12_cmp_scalar, // same latency as 32 bit compare + SchedWriteFCmpSizes.PD.Scl>, XD, VEX_4V, VEX_LIG, VEX_WIG; let Constraints = "$src1 = $dst" in { @@ -1867,12 +1867,12 @@ let Constraints = "$src1 = $dst" in { defm CMPSS : sse12_cmp_scalar, XS; + SchedWriteFCmpSizes.PS.Scl>, XS; let ExeDomain = SSEPackedDouble in defm CMPSD : sse12_cmp_scalar, XD; + SchedWriteFCmpSizes.PD.Scl>, XD; } multiclass sse12_cmp_scalar_int, XS, VEX_4V; + SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, XS, VEX_4V; let ExeDomain = SSEPackedDouble in defm VCMPSD : sse12_cmp_scalar_int, // same latency as f32 + SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, XD, VEX_4V; let Constraints = "$src1 = $dst" in { let ExeDomain = SSEPackedSingle in defm CMPSS : sse12_cmp_scalar_int, XS; + SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, XS; let ExeDomain = SSEPackedDouble in defm CMPSD : sse12_cmp_scalar_int, XD; + SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, XD; } } @@ -2030,28 +2030,28 @@ multiclass sse12_cmp_packed, PS, VEX_4V, VEX_WIG; + SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, loadv4f32>, PS, VEX_4V, VEX_WIG; defm VCMPPD : sse12_cmp_packed, PD, VEX_4V, VEX_WIG; + SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, loadv2f64>, PD, VEX_4V, VEX_WIG; defm VCMPPSY : sse12_cmp_packed, PS, VEX_4V, VEX_L, VEX_WIG; + SchedWriteFCmpSizes.PS.YMM, SSEPackedSingle, loadv8f32>, PS, VEX_4V, VEX_L, VEX_WIG; defm VCMPPDY : sse12_cmp_packed, PD, VEX_4V, VEX_L, VEX_WIG; + SchedWriteFCmpSizes.PD.YMM, SSEPackedDouble, loadv4f64>, PD, VEX_4V, VEX_L, VEX_WIG; let Constraints = "$src1 = $dst" in { defm CMPPS : sse12_cmp_packed, PS; + SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, memopv4f32>, PS; defm CMPPD : sse12_cmp_packed, PD; + SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, memopv2f64>, PD; } def CommutableCMPCC : PatLeaf<(imm), [{ @@ -4438,28 +4438,28 @@ multiclass sse3_addsub, + SchedWriteFAddSizes.PS.XMM, loadv4f32, 0>, XD, VEX_4V, VEX_WIG; defm VADDSUBPSY : sse3_addsub<"vaddsubps", v8f32, VR256, f256mem, - SchedWriteFAdd.YMM, loadv8f32, 0>, + SchedWriteFAddSizes.PS.YMM, loadv8f32, 0>, XD, VEX_4V, VEX_L, VEX_WIG; } let ExeDomain = SSEPackedDouble in { defm VADDSUBPD : sse3_addsub<"vaddsubpd", v2f64, VR128, f128mem, - SchedWriteFAdd.XMM, loadv2f64, 0>, + SchedWriteFAddSizes.PD.XMM, loadv2f64, 0>, PD, VEX_4V, VEX_WIG; defm VADDSUBPDY : sse3_addsub<"vaddsubpd", v4f64, VR256, f256mem, - SchedWriteFAdd.YMM, loadv4f64, 0>, + SchedWriteFAddSizes.PD.YMM, loadv4f64, 0>, PD, VEX_4V, VEX_L, VEX_WIG; } } let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in { let ExeDomain = SSEPackedSingle in defm ADDSUBPS : sse3_addsub<"addsubps", v4f32, VR128, f128mem, - SchedWriteFAdd.XMM, memopv4f32>, XD; + SchedWriteFAddSizes.PS.XMM, memopv4f32>, XD; let ExeDomain = SSEPackedDouble in defm ADDSUBPD : sse3_addsub<"addsubpd", v2f64, VR128, f128mem, - SchedWriteFAdd.XMM, memopv2f64>, PD; + SchedWriteFAddSizes.PD.XMM, memopv2f64>, PD; } //===---------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 4c2aef0099b6..036cfa54162f 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -155,13 +155,28 @@ def : WriteRes { let Latency = 5; } def : WriteRes; def : WriteRes; -defm : BWWriteResPair; // Floating point add/sub. -defm : BWWriteResPair; // Floating point add/sub (YMM/ZMM). -defm : BWWriteResPair; // Floating point compare. -defm : BWWriteResPair; // Floating point compare (YMM/ZMM). -defm : BWWriteResPair; // Floating point compare to flags. -defm : BWWriteResPair; // Floating point multiplication. -defm : BWWriteResPair; // Floating point multiplication (YMM/ZMM). +defm : BWWriteResPair; // Floating point add/sub. +defm : BWWriteResPair; // Floating point add/sub (XMM). +defm : BWWriteResPair; // Floating point add/sub (YMM/ZMM). +defm : BWWriteResPair; // Floating point double add/sub. +defm : BWWriteResPair; // Floating point double add/sub (XMM). +defm : BWWriteResPair; // Floating point double add/sub (YMM/ZMM). + +defm : BWWriteResPair; // Floating point compare. +defm : BWWriteResPair; // Floating point compare (XMM). +defm : BWWriteResPair; // Floating point compare (YMM/ZMM). +defm : BWWriteResPair; // Floating point double compare. +defm : BWWriteResPair; // Floating point double compare (XMM). +defm : BWWriteResPair; // Floating point double compare (YMM/ZMM). + +defm : BWWriteResPair; // Floating point compare to flags. + +defm : BWWriteResPair; // Floating point multiplication. +defm : BWWriteResPair; // Floating point multiplication (XMM). +defm : BWWriteResPair; // Floating point multiplication (YMM/ZMM). +defm : BWWriteResPair; // Floating point double multiplication. +defm : BWWriteResPair; // Floating point double multiplication (XMM). +defm : BWWriteResPair; // Floating point double multiplication (YMM/ZMM). //defm : BWWriteResPair; // Floating point division. defm : BWWriteResPair; // Floating point division (XMM). diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index b60c0d69af55..439ca126dfef 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -149,13 +149,28 @@ def : WriteRes { let Latency = 5; } def : WriteRes; defm : X86WriteRes; -defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; + +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; + +defm : HWWriteResPair; + +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; @@ -882,17 +897,7 @@ def HWWriteResGroup12 : SchedWriteRes<[HWPort1,HWPort23]> { } def: InstRW<[HWWriteResGroup12], (instregex "MMX_CVTPI2PSirm", "PDEP(32|64)rm", - "PEXT(32|64)rm", - "(V?)ADDSDrm", - "(V?)ADDSSrm", - "(V?)CMPSDrm", - "(V?)CMPSSrm", - "(V?)MAX(C?)SDrm", - "(V?)MAX(C?)SSrm", - "(V?)MIN(C?)SDrm", - "(V?)MIN(C?)SSrm", - "(V?)SUBSDrm", - "(V?)SUBSSrm")>; + "PEXT(32|64)rm")>; def HWWriteResGroup12_1 : SchedWriteRes<[HWPort1,HWPort0156,HWPort23]> { let Latency = 8; @@ -1658,14 +1663,6 @@ def HWWriteResGroup91_5 : SchedWriteRes<[HWPort0,HWPort23]> { } def: InstRW<[HWWriteResGroup91_5], (instregex "MMX_PSADBWirm")>; -def HWWriteResGroup92_2 : SchedWriteRes<[HWPort01,HWPort23]> { - let Latency = 10; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup92_2], (instregex "(V?)MULSDrm", - "(V?)MULSSrm")>; - def HWWriteResGroup93 : SchedWriteRes<[HWPort1,HWPort5]> { let Latency = 5; let NumMicroOps = 3; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index fcd5fb93f654..c390475e400f 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -139,13 +139,28 @@ def : WriteRes { let Latency = 6; } def : WriteRes; defm : X86WriteRes; -defm : SBWriteResPair; -defm : SBWriteResPair; -defm : SBWriteResPair; -defm : SBWriteResPair; -defm : SBWriteResPair; -defm : SBWriteResPair; -defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; + +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; + +defm : SBWriteResPair; + +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 408b3ce3690e..82ec0c03cce1 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -152,13 +152,28 @@ def : WriteRes; def : WriteRes; defm : X86WriteRes; -defm : SKLWriteResPair; // Floating point add/sub. -defm : SKLWriteResPair; // Floating point add/sub (YMM/ZMM). -defm : SKLWriteResPair; // Floating point compare. -defm : SKLWriteResPair; // Floating point compare (YMM/ZMM). -defm : SKLWriteResPair; // Floating point compare to flags. -defm : SKLWriteResPair; // Floating point multiplication. -defm : SKLWriteResPair; // Floating point multiplication (YMM/ZMM). +defm : SKLWriteResPair; // Floating point add/sub. +defm : SKLWriteResPair; // Floating point add/sub (XMM). +defm : SKLWriteResPair; // Floating point add/sub (YMM/ZMM). +defm : SKLWriteResPair; // Floating point double add/sub. +defm : SKLWriteResPair; // Floating point double add/sub (XMM). +defm : SKLWriteResPair; // Floating point double add/sub (YMM/ZMM). + +defm : SKLWriteResPair; // Floating point compare. +defm : SKLWriteResPair; // Floating point compare (XMM). +defm : SKLWriteResPair; // Floating point compare (YMM/ZMM). +defm : SKLWriteResPair; // Floating point double compare. +defm : SKLWriteResPair; // Floating point double compare (XMM). +defm : SKLWriteResPair; // Floating point double compare (YMM/ZMM). + +defm : SKLWriteResPair; // Floating point compare to flags. + +defm : SKLWriteResPair; // Floating point multiplication. +defm : SKLWriteResPair; // Floating point multiplication (XMM). +defm : SKLWriteResPair; // Floating point multiplication (YMM/ZMM). +defm : SKLWriteResPair; // Floating point double multiplication. +defm : SKLWriteResPair; // Floating point double multiplication (XMM). +defm : SKLWriteResPair; // Floating point double multiplication (YMM/ZMM). defm : SKLWriteResPair; // Floating point division. //defm : SKLWriteResPair; // Floating point division (XMM). @@ -1473,24 +1488,6 @@ def: InstRW<[SKLWriteResGroup121], (instregex "(V?)PCMPGTQrm", "VPMOVSXWDYrm", "VPMOVZXWDYrm")>; -def SKLWriteResGroup122 : SchedWriteRes<[SKLPort01,SKLPort23]> { - let Latency = 9; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup122], (instregex "(V?)ADDSDrm", - "(V?)ADDSSrm", - "(V?)CMPSDrm", - "(V?)CMPSSrm", - "(V?)MAX(C?)SDrm", - "(V?)MAX(C?)SSrm", - "(V?)MIN(C?)SDrm", - "(V?)MIN(C?)SSrm", - "(V?)MULSDrm", - "(V?)MULSSrm", - "(V?)SUBSDrm", - "(V?)SUBSSrm")>; - def SKLWriteResGroup123 : SchedWriteRes<[SKLPort23,SKLPort01]> { let Latency = 9; let NumMicroOps = 2; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 00a1c2b9eff3..2b76e54f8283 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -152,13 +152,28 @@ def : WriteRes; def : WriteRes; defm : X86WriteRes; -defm : SKXWriteResPair; // Floating point add/sub. -defm : SKXWriteResPair; // Floating point add/sub (YMM/ZMM). -defm : SKXWriteResPair; // Floating point compare. -defm : SKXWriteResPair; // Floating point compare (YMM/ZMM). -defm : SKXWriteResPair; // Floating point compare to flags. -defm : SKXWriteResPair; // Floating point multiplication. -defm : SKXWriteResPair; // Floating point multiplication (YMM/ZMM). +defm : SKXWriteResPair; // Floating point add/sub. +defm : SKXWriteResPair; // Floating point add/sub (XMM). +defm : SKXWriteResPair; // Floating point add/sub (YMM/ZMM). +defm : SKXWriteResPair; // Floating point double add/sub. +defm : SKXWriteResPair; // Floating point double add/sub (XMM). +defm : SKXWriteResPair; // Floating point double add/sub (YMM/ZMM). + +defm : SKXWriteResPair; // Floating point compare. +defm : SKXWriteResPair; // Floating point compare (XMM). +defm : SKXWriteResPair; // Floating point compare (YMM/ZMM). +defm : SKXWriteResPair; // Floating point double compare. +defm : SKXWriteResPair; // Floating point double compare (XMM). +defm : SKXWriteResPair; // Floating point double compare (YMM/ZMM). + +defm : SKXWriteResPair; // Floating point compare to flags. + +defm : SKXWriteResPair; // Floating point multiplication. +defm : SKXWriteResPair; // Floating point multiplication (XMM). +defm : SKXWriteResPair; // Floating point multiplication (YMM/ZMM). +defm : SKXWriteResPair; // Floating point double multiplication. +defm : SKXWriteResPair; // Floating point double multiplication (XMM). +defm : SKXWriteResPair; // Floating point double multiplication (YMM/ZMM). defm : SKXWriteResPair; // 10-14 cycles. // Floating point division. //defm : SKXWriteResPair; // 10-14 cycles. // Floating point division (XMM). @@ -2472,20 +2487,8 @@ def SKXWriteResGroup137 : SchedWriteRes<[SKXPort23,SKXPort015]> { } def: InstRW<[SKXWriteResGroup137], (instregex "MMX_CVTPS2PIirm", "MMX_CVTTPS2PIirm", - "(V?)ADDSDrm", - "(V?)ADDSSrm", - "(V?)CMPSDrm", - "(V?)CMPSSrm", "VCVTPH2PSrm", - "(V?)CVTPS2PDrm", - "(V?)MAX(C?)SDrm", - "(V?)MAX(C?)SSrm", - "(V?)MIN(C?)SDrm", - "(V?)MIN(C?)SSrm", - "(V?)MULSDrm", - "(V?)MULSSrm", - "(V?)SUBSDrm", - "(V?)SUBSSrm")>; + "(V?)CVTPS2PDrm")>; def SKXWriteResGroup138 : SchedWriteRes<[SKXPort0,SKXPort015]> { let Latency = 9; diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index 22c671940737..215847abca70 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -105,24 +105,37 @@ def WriteZero : SchedWrite; defm WriteJump : X86SchedWritePair; // Floating point. This covers both scalar and vector operations. -def WriteFLoad : SchedWrite; -def WriteFStore : SchedWrite; -def WriteFMove : SchedWrite; -defm WriteFAdd : X86SchedWritePair; // Floating point add/sub. -defm WriteFAddY : X86SchedWritePair; // Floating point add/sub (YMM/ZMM). -defm WriteFCmp : X86SchedWritePair; // Floating point compare. -defm WriteFCmpY : X86SchedWritePair; // Floating point compare (YMM/ZMM). -defm WriteFCom : X86SchedWritePair; // Floating point compare to flags. -defm WriteFMul : X86SchedWritePair; // Floating point multiplication. -defm WriteFMulY : X86SchedWritePair; // Floating point multiplication (YMM/ZMM). +def WriteFLoad : SchedWrite; +def WriteFStore : SchedWrite; +def WriteFMove : SchedWrite; + +defm WriteFAdd : X86SchedWritePair; // Floating point add/sub. +defm WriteFAddX : X86SchedWritePair; // Floating point add/sub (XMM). +defm WriteFAddY : X86SchedWritePair; // Floating point add/sub (YMM/ZMM). +defm WriteFAdd64 : X86SchedWritePair; // Floating point double add/sub. +defm WriteFAdd64X : X86SchedWritePair; // Floating point double add/sub (XMM). +defm WriteFAdd64Y : X86SchedWritePair; // Floating point double add/sub (YMM/ZMM). +defm WriteFCmp : X86SchedWritePair; // Floating point compare. +defm WriteFCmpX : X86SchedWritePair; // Floating point compare (XMM). +defm WriteFCmpY : X86SchedWritePair; // Floating point compare (YMM/ZMM). +defm WriteFCmp64 : X86SchedWritePair; // Floating point double compare. +defm WriteFCmp64X : X86SchedWritePair; // Floating point double compare (XMM). +defm WriteFCmp64Y : X86SchedWritePair; // Floating point double compare (YMM/ZMM). +defm WriteFCom : X86SchedWritePair; // Floating point compare to flags. +defm WriteFMul : X86SchedWritePair; // Floating point multiplication. +defm WriteFMulX : X86SchedWritePair; // Floating point multiplication (XMM). +defm WriteFMulY : X86SchedWritePair; // Floating point multiplication (YMM/ZMM). +defm WriteFMul64 : X86SchedWritePair; // Floating point double multiplication. +defm WriteFMul64X : X86SchedWritePair; // Floating point double multiplication (XMM). +defm WriteFMul64Y : X86SchedWritePair; // Floating point double multiplication (YMM/ZMM). defm WriteFDiv : X86SchedWritePair; // Floating point division. defm WriteFDivX : X86SchedWritePair; // Floating point division (XMM). defm WriteFDivY : X86SchedWritePair; // Floating point division (YMM). defm WriteFDivZ : X86SchedWritePair; // Floating point division (ZMM). -defm WriteFDiv64 : X86SchedWritePair; // Floating point division. -defm WriteFDiv64X : X86SchedWritePair; // Floating point division (XMM). -defm WriteFDiv64Y : X86SchedWritePair; // Floating point division (YMM). -defm WriteFDiv64Z : X86SchedWritePair; // Floating point division (ZMM). +defm WriteFDiv64 : X86SchedWritePair; // Floating point double division. +defm WriteFDiv64X : X86SchedWritePair; // Floating point double division (XMM). +defm WriteFDiv64Y : X86SchedWritePair; // Floating point double division (YMM). +defm WriteFDiv64Z : X86SchedWritePair; // Floating point double division (ZMM). defm WriteFSqrt : X86SchedWritePair; // Floating point square root. defm WriteFSqrtX : X86SchedWritePair; // Floating point square root (XMM). defm WriteFSqrtY : X86SchedWritePair; // Floating point square root (YMM). @@ -267,15 +280,19 @@ def WriteNop : SchedWrite; // Vector width wrappers. def SchedWriteFAdd - : X86SchedWriteWidths; + : X86SchedWriteWidths; +def SchedWriteFAdd64 + : X86SchedWriteWidths; def SchedWriteFHAdd : X86SchedWriteWidths; def SchedWriteFCmp - : X86SchedWriteWidths; + : X86SchedWriteWidths; +def SchedWriteFCmp64 + : X86SchedWriteWidths; def SchedWriteFMul - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFMul64 - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFMA : X86SchedWriteWidths; def SchedWriteDPPD @@ -356,9 +373,9 @@ def SchedWriteVarBlend // Vector size wrappers. def SchedWriteFAddSizes - : X86SchedWriteSizes; + : X86SchedWriteSizes; def SchedWriteFCmpSizes - : X86SchedWriteSizes; + : X86SchedWriteSizes; def SchedWriteFMulSizes : X86SchedWriteSizes; def SchedWriteFDivSizes diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index 77c8ae7ae63e..b0aa002ea253 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -204,12 +204,24 @@ def : WriteRes; defm : X86WriteRes; defm : AtomWriteResPair; +defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; @@ -398,8 +410,7 @@ def AtomWrite0_5 : SchedWriteRes<[AtomPort0]> { let Latency = 5; let ResourceCycles = [5]; } -def : InstRW<[AtomWrite0_5], (instregex "IMUL32(rm|rr)", - "MUL(PS|SD)(rr|rm)(_Int)?")>; +def : InstRW<[AtomWrite0_5], (instregex "IMUL32(rm|rr)")>; // Port1 def AtomWrite1_1 : SchedWriteRes<[AtomPort1]> { @@ -518,13 +529,10 @@ def : InstRW<[AtomWrite01_6], (instrs LD_F1, CMPXCHG8rm, INTO, XLAT, SHLD16rri8, SHRD16rri8, SHLD16mrCL, SHRD16mrCL, SHLD16mri8, SHRD16mri8, - ADDSUBPDrr, ADDSUBPDrm, CVTPS2DQrr, CVTTPS2DQrr)>; def : InstRW<[AtomWrite01_6], (instregex "IMUL16rr", "IST_F(P)?(16|32|64)?m", - "MMX_PH(ADD|SUB)S?Wrm", - "(ADD|SUB|MAX|MIN)PDrr", - "CMPPDrri")>; + "MMX_PH(ADD|SUB)S?Wrm")>; def AtomWrite01_7 : SchedWriteRes<[AtomPort01]> { let Latency = 7; @@ -541,8 +549,6 @@ def : InstRW<[AtomWrite01_7], (instrs AAD8i8, MMX_CVTPD2PIirr, MMX_CVTPI2PDirr, MMX_CVTTPD2PIirr)>; -def : InstRW<[AtomWrite01_7], (instregex "(ADD|SUB|MAX|MIN)PDrm", - "CMPPDrmi")>; def AtomWrite01_8 : SchedWriteRes<[AtomPort01]> { let Latency = 8; @@ -571,8 +577,7 @@ def : InstRW<[AtomWrite01_9], (instrs BT16mr, BT32mr, BT64mr, SHLD64mrCL, SHRD64mrCL, SHLD64mri8, SHRD64mri8, SHLD64rri8, SHRD64rri8, - CMPXCHG8rr, - MULPDrr)>; + CMPXCHG8rr)>; def : InstRW<[AtomWrite01_9], (instregex "CMOV(B|BE|E|P|NB|NBE|NE|NP)_F", "(U)?COM_FI", "TST_F", "(U)?COMIS(D|S)rr", @@ -582,8 +587,7 @@ def AtomWrite01_10 : SchedWriteRes<[AtomPort01]> { let Latency = 10; let ResourceCycles = [10]; } -def : InstRW<[AtomWrite01_10], (instrs FLDL2E, FLDL2T, FLDLG2, FLDLN2, FLDPI, - MULPDrm)>; +def : InstRW<[AtomWrite01_10], (instrs FLDL2E, FLDL2T, FLDLG2, FLDLN2, FLDPI)>; def : InstRW<[AtomWrite01_10], (instregex "(U)?COMIS(D|S)rm", "CVT(T)?SS2SI64rm(_Int)?")>; diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 685ea3b4743f..ae4ee2b47463 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -317,12 +317,24 @@ def : WriteRes; def : WriteRes { let Latency = 2; } defm : JWriteResFpuPair; +defm : JWriteResFpuPair; defm : JWriteResYMMPair; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; defm : JWriteResFpuPair; +defm : JWriteResFpuPair; defm : JWriteResYMMPair; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; +defm : JWriteResFpuPair; defm : JWriteResYMMPair; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; defm : JWriteResFpuPair; // NOTE: Doesn't exist on Jaguar. defm : JWriteResFpuPair; // NOTE: Doesn't exist on Jaguar. defm : JWriteResFpuPair; // NOTE: Doesn't exist on Jaguar. @@ -552,32 +564,6 @@ def : InstRW<[JWriteCVTPH2PSYLd], (instrs VCVTPH2PSYrm)>; // AVX instructions. //////////////////////////////////////////////////////////////////////////////// -def JWriteVMULYPD: SchedWriteRes<[JFPU1, JFPM]> { - let Latency = 4; - let ResourceCycles = [2, 4]; - let NumMicroOps = 2; -} -def : InstRW<[JWriteVMULYPD], (instrs VMULPDYrr)>; - -def JWriteVMULYPDLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> { - let Latency = 9; - let ResourceCycles = [2, 2, 4]; - let NumMicroOps = 2; -} -def : InstRW<[JWriteVMULYPDLd, ReadAfterLd], (instrs VMULPDYrm)>; - -def JWriteVMULPD: SchedWriteRes<[JFPU1, JFPM]> { - let Latency = 4; - let ResourceCycles = [1, 2]; -} -def : InstRW<[JWriteVMULPD], (instrs MULPDrr, MULSDrr, VMULPDrr, VMULSDrr)>; - -def JWriteVMULPDLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> { - let Latency = 9; - let ResourceCycles = [1, 1, 2]; -} -def : InstRW<[JWriteVMULPDLd], (instrs MULPDrm, MULSDrm, VMULPDrm, VMULSDrm)>; - def JWriteVCVTY: SchedWriteRes<[JFPU1, JSTC]> { let Latency = 3; let ResourceCycles = [2, 2]; diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index 93de36b60e66..0f6b5385f29c 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -130,13 +130,25 @@ def : WriteRes { let Latency = 3; } def : WriteRes; defm : X86WriteRes; -defm : SLMWriteResPair; -defm : SLMWriteResPair; -defm : SLMWriteResPair; -defm : SLMWriteResPair; -defm : SLMWriteResPair; -defm : SLMWriteResPair; -defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index 9f0c637d0e69..41767444abfb 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -193,9 +193,17 @@ def : WriteRes; def : WriteRes { let Latency = 8; } defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; @@ -224,7 +232,11 @@ defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; diff --git a/llvm/test/CodeGen/X86/sse3-schedule.ll b/llvm/test/CodeGen/X86/sse3-schedule.ll index 8c2424ec405e..79271c9c94bd 100644 --- a/llvm/test/CodeGen/X86/sse3-schedule.ll +++ b/llvm/test/CodeGen/X86/sse3-schedule.ll @@ -29,7 +29,7 @@ define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; ATOM-LABEL: test_addsubpd: ; ATOM: # %bb.0: ; ATOM-NEXT: addsubpd %xmm1, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: addsubpd (%rdi), %xmm0 # sched: [6:3.00] +; ATOM-NEXT: addsubpd (%rdi), %xmm0 # sched: [7:3.50] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_addsubpd: diff --git a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s index b110162992a6..2d03f1057f2f 100644 --- a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s @@ -407,7 +407,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 5 5.00 addsd %xmm0, %xmm2 # CHECK-NEXT: 1 5 5.00 * addsd (%rax), %xmm2 # CHECK-NEXT: 1 6 3.00 addsubpd %xmm0, %xmm2 -# CHECK-NEXT: 1 6 3.00 * addsubpd (%rax), %xmm2 +# CHECK-NEXT: 1 7 3.50 * addsubpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 andnpd %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 * andnpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 andpd %xmm0, %xmm2 @@ -673,7 +673,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] -# CHECK-NEXT: 872.50 644.50 +# CHECK-NEXT: 873.00 645.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] Instructions: @@ -682,7 +682,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 5.00 - addsd %xmm0, %xmm2 # CHECK-NEXT: 5.00 - addsd (%rax), %xmm2 # CHECK-NEXT: 3.00 3.00 addsubpd %xmm0, %xmm2 -# CHECK-NEXT: 3.00 3.00 addsubpd (%rax), %xmm2 +# CHECK-NEXT: 3.50 3.50 addsubpd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 andnpd %xmm0, %xmm2 # CHECK-NEXT: 1.00 - andnpd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 andpd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse3.s index b604d10c0a2c..f70b1716e220 100644 --- a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse3.s +++ b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse3.s @@ -40,7 +40,7 @@ movsldup (%rax), %xmm2 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 6 3.00 addsubpd %xmm0, %xmm2 -# CHECK-NEXT: 1 6 3.00 * addsubpd (%rax), %xmm2 +# CHECK-NEXT: 1 7 3.50 * addsubpd (%rax), %xmm2 # CHECK-NEXT: 1 5 5.00 addsubps %xmm0, %xmm2 # CHECK-NEXT: 1 5 5.00 * addsubps (%rax), %xmm2 # CHECK-NEXT: 1 8 4.00 haddpd %xmm0, %xmm2 @@ -65,12 +65,12 @@ movsldup (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] -# CHECK-NEXT: 57.50 41.50 +# CHECK-NEXT: 58.00 42.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] Instructions: # CHECK-NEXT: 3.00 3.00 addsubpd %xmm0, %xmm2 -# CHECK-NEXT: 3.00 3.00 addsubpd (%rax), %xmm2 +# CHECK-NEXT: 3.50 3.50 addsubpd (%rax), %xmm2 # CHECK-NEXT: 5.00 - addsubps %xmm0, %xmm2 # CHECK-NEXT: 5.00 - addsubps (%rax), %xmm2 # CHECK-NEXT: 4.00 4.00 haddpd %xmm0, %xmm2