diff --git a/llvm/lib/Target/X86/X86Instr3DNow.td b/llvm/lib/Target/X86/X86Instr3DNow.td index 0c834260a5b2..c64c6be3cf01 100644 --- a/llvm/lib/Target/X86/X86Instr3DNow.td +++ b/llvm/lib/Target/X86/X86Instr3DNow.td @@ -74,8 +74,7 @@ defm PFSUBR : I3DNow_binop_rm_int<0xAA, "pfsubr", WriteFAdd, 1>; defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd", WriteCvtI2F>; defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw", SchedWriteVecIMul.MMX, 1>; -// FIXME: Is there a better scheduler class for EMMS/FEMMS? -let SchedRW = [WriteMicrocoded] in +let SchedRW = [WriteEMMS] in def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>, TB; diff --git a/llvm/lib/Target/X86/X86InstrMMX.td b/llvm/lib/Target/X86/X86InstrMMX.td index c410ddb4c60f..d41641ad158a 100644 --- a/llvm/lib/Target/X86/X86InstrMMX.td +++ b/llvm/lib/Target/X86/X86InstrMMX.td @@ -153,8 +153,7 @@ multiclass sse12_cvt_pint_3addr opc, RegisterClass SrcRC, // MMX EMMS Instruction //===----------------------------------------------------------------------===// -// FIXME: Is there a better scheduler class for EMMS/FEMMS? -let SchedRW = [WriteMicrocoded] in +let SchedRW = [WriteEMMS] in def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms", [(int_x86_mmx_emms)]>; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 21d0c8a629d1..edd81bed65fe 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -205,6 +205,7 @@ def : WriteRes { def : WriteRes { let Latency = 5; } def : WriteRes; def : WriteRes; +defm : X86WriteRes; defm : BWWriteResPair; // Vector integer ALU op, no logicals. defm : BWWriteResPair; // Vector integer ALU op, no logicals (YMM/ZMM). @@ -1779,13 +1780,6 @@ def BWWriteResGroup186 : SchedWriteRes<[BWPort4,BWPort6,BWPort23,BWPort237,BWPor } def: InstRW<[BWWriteResGroup186], (instregex "^XSAVE$", "XSAVEC", "XSAVES", "XSAVEOPT")>; -def BWWriteResGroup187 : SchedWriteRes<[BWPort01,BWPort15,BWPort015,BWPort0156]> { - let Latency = 31; - let NumMicroOps = 31; - let ResourceCycles = [8,1,21,1]; -} -def: InstRW<[BWWriteResGroup187], (instregex "MMX_EMMS")>; - def BWWriteResGroup189 : SchedWriteRes<[BWPort0,BWPort015,BWFPDivider]> { let Latency = 29; let NumMicroOps = 3; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index cacf24f1e34f..79a9e7a847e0 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -147,6 +147,7 @@ defm : HWWriteResPair; def : WriteRes; def : WriteRes { let Latency = 5; } def : WriteRes; +defm : X86WriteRes; defm : HWWriteResPair; defm : HWWriteResPair; @@ -2105,13 +2106,6 @@ def HWWriteResGroup171 : SchedWriteRes<[HWPort5,HWPort6,HWPort23,HWPort237,HWPor def: InstRW<[HWWriteResGroup171], (instregex "OUT(8|16|32)ir", "OUT(8|16|32)rr")>; -def HWWriteResGroup172 : SchedWriteRes<[HWPort01,HWPort15,HWPort015,HWPort0156]> { - let Latency = 31; - let NumMicroOps = 31; - let ResourceCycles = [8,1,21,1]; -} -def: InstRW<[HWWriteResGroup172], (instregex "MMX_EMMS")>; - def HWWriteResGroup173 : SchedWriteRes<[HWPort0,HWPort15,HWFPDivider]> { let Latency = 35; let NumMicroOps = 3; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 9c424c06536e..a39e5b2bf28e 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -137,6 +137,7 @@ defm : SBWriteResPair; def : WriteRes; def : WriteRes { let Latency = 6; } def : WriteRes; +defm : X86WriteRes; defm : SBWriteResPair; defm : SBWriteResPair; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 1e34b8409153..8cb6b14239da 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -150,6 +150,7 @@ defm : SKLWriteResPair; def : WriteRes { let Latency = 6; } def : WriteRes; def : WriteRes; +defm : X86WriteRes; defm : SKLWriteResPair; // Floating point add/sub. defm : SKLWriteResPair; // Floating point add/sub (YMM/ZMM). @@ -1600,13 +1601,6 @@ def SKLWriteResGroup143 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237, } def: InstRW<[SKLWriteResGroup143], (instregex "XCHG(8|16|32|64)rm")>; -def SKLWriteResGroup144 : SchedWriteRes<[SKLPort05,SKLPort0156]> { - let Latency = 10; - let NumMicroOps = 10; - let ResourceCycles = [9,1]; -} -def: InstRW<[SKLWriteResGroup144], (instregex "MMX_EMMS")>; - def SKLWriteResGroup145 : SchedWriteRes<[SKLPort0,SKLFPDivider]> { let Latency = 11; let NumMicroOps = 1; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index c22c864425f5..c945fae34d5c 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -150,6 +150,7 @@ defm : SKXWriteResPair; def : WriteRes { let Latency = 5; } def : WriteRes; def : WriteRes; +defm : X86WriteRes; defm : SKXWriteResPair; // Floating point add/sub. defm : SKXWriteResPair; // Floating point add/sub (YMM/ZMM). @@ -2693,13 +2694,6 @@ def SKXWriteResGroup157 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237, } def: InstRW<[SKXWriteResGroup157], (instregex "XCHG(8|16|32|64)rm")>; -def SKXWriteResGroup158 : SchedWriteRes<[SKXPort05,SKXPort0156]> { - let Latency = 10; - let NumMicroOps = 10; - let ResourceCycles = [9,1]; -} -def: InstRW<[SKXWriteResGroup158], (instregex "MMX_EMMS")>; - def SKXWriteResGroup159 : SchedWriteRes<[SKXPort0,SKXFPDivider]> { let Latency = 11; let NumMicroOps = 1; diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index e44aeaf0e17d..768e5e15d6a2 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -216,6 +216,9 @@ defm WriteAESKeyGen : X86SchedWritePair; // Key Generation. // Carry-less multiplication instructions. defm WriteCLMul : X86SchedWritePair; +// EMMS/FEMMS +def WriteEMMS : SchedWrite; + // Load/store MXCSR def WriteLDMXCSR : SchedWrite; def WriteSTMXCSR : SchedWrite; diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index 4c64657c70a2..1b3337180424 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -201,6 +201,7 @@ def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; +defm : X86WriteRes; defm : AtomWriteResPair; defm : AtomWriteResPair; @@ -490,8 +491,7 @@ def AtomWrite01_5 : SchedWriteRes<[AtomPort01]> { let Latency = 5; let ResourceCycles = [5]; } -def : InstRW<[AtomWrite01_5], (instrs FLDCW16m, ST_FP80m, - MMX_EMMS)>; +def : InstRW<[AtomWrite01_5], (instrs FLDCW16m, ST_FP80m)>; def : InstRW<[AtomWrite01_5], (instregex "MMX_PH(ADD|SUB)S?Wrr")>; def AtomWrite01_6 : SchedWriteRes<[AtomPort01]> { diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index d9dccf408290..8521ed3881d4 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -314,6 +314,7 @@ def : WriteRes { let Latency = 1; } def : WriteRes { let Latency = 5; } def : WriteRes; def : WriteRes; +def : WriteRes { let Latency = 2; } defm : JWriteResFpuPair; defm : JWriteResYMMPair; diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index 08b3f36728e0..dec522ea97d9 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -128,6 +128,7 @@ defm : SLMWriteResPair def : WriteRes; def : WriteRes { let Latency = 3; } def : WriteRes; +defm : X86WriteRes; defm : SLMWriteResPair; defm : SLMWriteResPair; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index 3e20cb2c75d8..cf95ac1fa0b0 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -234,6 +234,7 @@ def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes { let Latency = 8; } +def : WriteRes { let Latency = 2; } defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; diff --git a/llvm/test/CodeGen/X86/3dnow-schedule.ll b/llvm/test/CodeGen/X86/3dnow-schedule.ll index 2f104382ae0c..6de1626795fa 100644 --- a/llvm/test/CodeGen/X86/3dnow-schedule.ll +++ b/llvm/test/CodeGen/X86/3dnow-schedule.ll @@ -4,7 +4,7 @@ define void @test_femms() optsize { ; CHECK-LABEL: test_femms: ; CHECK: # %bb.0: -; CHECK-NEXT: femms # sched: [100:0.33] +; CHECK-NEXT: femms # sched: [31:10.33] ; CHECK-NEXT: retq # sched: [1:1.00] call void @llvm.x86.mmx.femms() ret void diff --git a/llvm/test/CodeGen/X86/mmx-schedule.ll b/llvm/test/CodeGen/X86/mmx-schedule.ll index b57a20d87178..af81bf33d1ba 100644 --- a/llvm/test/CodeGen/X86/mmx-schedule.ll +++ b/llvm/test/CodeGen/X86/mmx-schedule.ll @@ -526,7 +526,7 @@ declare x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float>) nounwind readnone define void @test_emms() optsize { ; GENERIC-LABEL: test_emms: ; GENERIC: # %bb.0: -; GENERIC-NEXT: emms # sched: [100:0.33] +; GENERIC-NEXT: emms # sched: [31:10.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_emms: @@ -536,12 +536,12 @@ define void @test_emms() optsize { ; ; SLM-LABEL: test_emms: ; SLM: # %bb.0: -; SLM-NEXT: emms # sched: [100:1.00] +; SLM-NEXT: emms # sched: [10:5.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: test_emms: ; SANDY: # %bb.0: -; SANDY-NEXT: emms # sched: [100:0.33] +; SANDY-NEXT: emms # sched: [31:10.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_emms: @@ -566,12 +566,12 @@ define void @test_emms() optsize { ; ; BTVER2-LABEL: test_emms: ; BTVER2: # %bb.0: -; BTVER2-NEXT: emms # sched: [100:0.50] +; BTVER2-NEXT: emms # sched: [2:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_emms: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: emms # sched: [100:?] +; ZNVER1-NEXT: emms # sched: [2:0.25] ; ZNVER1-NEXT: retq # sched: [1:0.50] call void @llvm.x86.mmx.emms() ret void diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-mmx.s b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-mmx.s index bb2e30c9d705..8ee3e12498ce 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-mmx.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-mmx.s @@ -164,7 +164,7 @@ pxor (%rax), %mm2 # CHECK-NEXT: [6]: HasSideEffects # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 100 0.50 * * * emms +# CHECK-NEXT: 1 2 0.50 * * * emms # CHECK-NEXT: 1 1 0.50 movd %eax, %mm2 # CHECK-NEXT: 1 5 1.00 * movd (%rax), %mm2 # CHECK-NEXT: 1 1 0.50 movd %mm0, %ecx @@ -288,11 +288,11 @@ pxor (%rax), %mm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] -# CHECK-NEXT: 2.50 2.50 - - - 51.00 45.00 46.00 - 2.00 - 45.00 45.00 6.00 +# CHECK-NEXT: 2.00 2.00 - 0.50 0.50 51.50 45.50 46.00 - 2.00 - 45.00 45.00 6.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: -# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - emms +# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - emms # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movd %eax, %mm2 # CHECK-NEXT: - - - - - - - 1.00 - - - - - - movd (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movd %mm0, %ecx diff --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-mmx.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-mmx.s index 1f59d4e5ca7c..53590138a53b 100644 --- a/llvm/test/tools/llvm-mca/X86/SLM/resources-mmx.s +++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-mmx.s @@ -164,7 +164,7 @@ pxor (%rax), %mm2 # CHECK-NEXT: [6]: HasSideEffects # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 100 1.00 * * * emms +# CHECK-NEXT: 9 10 5.00 * * * emms # CHECK-NEXT: 1 1 0.50 movd %eax, %mm2 # CHECK-NEXT: 1 3 1.00 * movd (%rax), %mm2 # CHECK-NEXT: 1 1 0.50 movd %mm0, %ecx @@ -282,11 +282,11 @@ pxor (%rax), %mm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] -# CHECK-NEXT: - - - 73.00 24.00 3.00 3.00 48.00 +# CHECK-NEXT: - - - 77.00 29.00 3.00 3.00 48.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions: -# CHECK-NEXT: - - - 1.00 - - - - emms +# CHECK-NEXT: - - - 5.00 5.00 - - - emms # CHECK-NEXT: - - - - - 0.50 0.50 - movd %eax, %mm2 # CHECK-NEXT: - - - - - - - 1.00 movd (%rax), %mm2 # CHECK-NEXT: - - - - - 0.50 0.50 - movd %mm0, %ecx diff --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-mmx.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-mmx.s index 99668b02b1b8..ec66e241465d 100644 --- a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-mmx.s +++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-mmx.s @@ -164,7 +164,7 @@ pxor (%rax), %mm2 # CHECK-NEXT: [6]: HasSideEffects # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 100 0.33 * * * emms +# CHECK-NEXT: 31 31 10.33 * * * emms # CHECK-NEXT: 1 1 0.33 movd %eax, %mm2 # CHECK-NEXT: 1 5 0.50 * movd (%rax), %mm2 # CHECK-NEXT: 1 1 0.33 movd %mm0, %ecx @@ -282,11 +282,11 @@ pxor (%rax), %mm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - 10.33 44.33 2.00 46.33 24.00 24.00 +# CHECK-NEXT: - - 20.33 54.33 2.00 56.33 24.00 24.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: -# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - emms +# CHECK-NEXT: - - 10.33 10.33 - 10.33 - - emms # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movd %eax, %mm2 # CHECK-NEXT: - - - - - - 0.50 0.50 movd (%rax), %mm2 # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movd %mm0, %ecx diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-mmx.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-mmx.s index cb6852738e5a..0bab31797725 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-mmx.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-mmx.s @@ -164,7 +164,7 @@ pxor (%rax), %mm2 # CHECK-NEXT: [6]: HasSideEffects # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 100 - * * * emms +# CHECK-NEXT: 1 2 0.25 * * * emms # CHECK-NEXT: 1 3 1.00 movd %eax, %mm2 # CHECK-NEXT: 1 8 0.50 * movd (%rax), %mm2 # CHECK-NEXT: 1 2 1.00 movd %mm0, %ecx @@ -286,11 +286,11 @@ pxor (%rax), %mm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: 24.00 24.00 - - - - - 27.00 24.00 28.00 21.00 - +# CHECK-NEXT: 24.00 24.00 - - - - - 27.25 24.25 28.25 21.25 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: -# CHECK-NEXT: - - - - - - - - - - - - emms +# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - emms # CHECK-NEXT: - - - - - - - - - 1.00 - - movd %eax, %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - movd (%rax), %mm2 # CHECK-NEXT: - - - - - - - - - 1.00 - - movd %mm0, %ecx