[X86] Add WriteEMMS scheduler class

Filled in the missing values from Btver2 SoG or Agner

llvm-svn: 331546
This commit is contained in:
Simon Pilgrim 2018-05-04 18:16:13 +00:00
parent 4f9ead2356
commit 0e51a125ea
18 changed files with 33 additions and 52 deletions

View File

@ -74,8 +74,7 @@ defm PFSUBR : I3DNow_binop_rm_int<0xAA, "pfsubr", WriteFAdd, 1>;
defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd", WriteCvtI2F>;
defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw", SchedWriteVecIMul.MMX, 1>;
// FIXME: Is there a better scheduler class for EMMS/FEMMS?
let SchedRW = [WriteMicrocoded] in
let SchedRW = [WriteEMMS] in
def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms",
[(int_x86_mmx_femms)]>, TB;

View File

@ -153,8 +153,7 @@ multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
// MMX EMMS Instruction
//===----------------------------------------------------------------------===//
// FIXME: Is there a better scheduler class for EMMS/FEMMS?
let SchedRW = [WriteMicrocoded] in
let SchedRW = [WriteEMMS] in
def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms", [(int_x86_mmx_emms)]>;
//===----------------------------------------------------------------------===//

View File

@ -205,6 +205,7 @@ def : WriteRes<WriteCvtF2FSt, [BWPort1,BWPort4,BWPort237]> {
def : WriteRes<WriteVecLoad, [BWPort23]> { let Latency = 5; }
def : WriteRes<WriteVecStore, [BWPort237, BWPort4]>;
def : WriteRes<WriteVecMove, [BWPort015]>;
defm : X86WriteRes<WriteEMMS, [BWPort01,BWPort15,BWPort015,BWPort0156], 31, [8,1,21,1], 31>;
defm : BWWriteResPair<WriteVecALU, [BWPort15], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
defm : BWWriteResPair<WriteVecALUY, [BWPort15], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (YMM/ZMM).
@ -1779,13 +1780,6 @@ def BWWriteResGroup186 : SchedWriteRes<[BWPort4,BWPort6,BWPort23,BWPort237,BWPor
}
def: InstRW<[BWWriteResGroup186], (instregex "^XSAVE$", "XSAVEC", "XSAVES", "XSAVEOPT")>;
def BWWriteResGroup187 : SchedWriteRes<[BWPort01,BWPort15,BWPort015,BWPort0156]> {
let Latency = 31;
let NumMicroOps = 31;
let ResourceCycles = [8,1,21,1];
}
def: InstRW<[BWWriteResGroup187], (instregex "MMX_EMMS")>;
def BWWriteResGroup189 : SchedWriteRes<[BWPort0,BWPort015,BWFPDivider]> {
let Latency = 29;
let NumMicroOps = 3;

View File

@ -147,6 +147,7 @@ defm : HWWriteResPair<WriteIDiv, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
def : WriteRes<WriteFStore, [HWPort237, HWPort4]>;
def : WriteRes<WriteFLoad, [HWPort23]> { let Latency = 5; }
def : WriteRes<WriteFMove, [HWPort5]>;
defm : X86WriteRes<WriteEMMS, [HWPort01,HWPort15,HWPort015,HWPort0156], 31, [8,1,21,1], 31>;
defm : HWWriteResPair<WriteFAdd, [HWPort1], 3, [1], 1, 6>;
defm : HWWriteResPair<WriteFAddY, [HWPort1], 3, [1], 1, 7>;
@ -2105,13 +2106,6 @@ def HWWriteResGroup171 : SchedWriteRes<[HWPort5,HWPort6,HWPort23,HWPort237,HWPor
def: InstRW<[HWWriteResGroup171], (instregex "OUT(8|16|32)ir",
"OUT(8|16|32)rr")>;
def HWWriteResGroup172 : SchedWriteRes<[HWPort01,HWPort15,HWPort015,HWPort0156]> {
let Latency = 31;
let NumMicroOps = 31;
let ResourceCycles = [8,1,21,1];
}
def: InstRW<[HWWriteResGroup172], (instregex "MMX_EMMS")>;
def HWWriteResGroup173 : SchedWriteRes<[HWPort0,HWPort15,HWFPDivider]> {
let Latency = 35;
let NumMicroOps = 3;

View File

@ -137,6 +137,7 @@ defm : SBWriteResPair<WriteBZHI, [SBPort1], 1>;
def : WriteRes<WriteFStore, [SBPort23, SBPort4]>;
def : WriteRes<WriteFLoad, [SBPort23]> { let Latency = 6; }
def : WriteRes<WriteFMove, [SBPort5]>;
defm : X86WriteRes<WriteEMMS, [SBPort015], 31, [31], 31>;
defm : SBWriteResPair<WriteFAdd, [SBPort1], 3, [1], 1, 6>;
defm : SBWriteResPair<WriteFAddY, [SBPort1], 3, [1], 1, 7>;

View File

@ -150,6 +150,7 @@ defm : SKLWriteResPair<WriteJump, [SKLPort06], 1>;
def : WriteRes<WriteFLoad, [SKLPort23]> { let Latency = 6; }
def : WriteRes<WriteFStore, [SKLPort237, SKLPort4]>;
def : WriteRes<WriteFMove, [SKLPort015]>;
defm : X86WriteRes<WriteEMMS, [SKLPort05,SKLPort0156], 10, [9,1], 10>;
defm : SKLWriteResPair<WriteFAdd, [SKLPort01], 4, [1], 1, 6>; // Floating point add/sub.
defm : SKLWriteResPair<WriteFAddY, [SKLPort01], 4, [1], 1, 7>; // Floating point add/sub (YMM/ZMM).
@ -1600,13 +1601,6 @@ def SKLWriteResGroup143 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,
}
def: InstRW<[SKLWriteResGroup143], (instregex "XCHG(8|16|32|64)rm")>;
def SKLWriteResGroup144 : SchedWriteRes<[SKLPort05,SKLPort0156]> {
let Latency = 10;
let NumMicroOps = 10;
let ResourceCycles = [9,1];
}
def: InstRW<[SKLWriteResGroup144], (instregex "MMX_EMMS")>;
def SKLWriteResGroup145 : SchedWriteRes<[SKLPort0,SKLFPDivider]> {
let Latency = 11;
let NumMicroOps = 1;

View File

@ -150,6 +150,7 @@ defm : SKXWriteResPair<WriteJump, [SKXPort06], 1>;
def : WriteRes<WriteFLoad, [SKXPort23]> { let Latency = 5; }
def : WriteRes<WriteFStore, [SKXPort237, SKXPort4]>;
def : WriteRes<WriteFMove, [SKXPort015]>;
defm : X86WriteRes<WriteEMMS, [SKXPort05,SKXPort0156], 10, [9,1], 10>;
defm : SKXWriteResPair<WriteFAdd, [SKXPort015], 4, [1], 1, 6>; // Floating point add/sub.
defm : SKXWriteResPair<WriteFAddY,[SKXPort015], 4, [1], 1, 7>; // Floating point add/sub (YMM/ZMM).
@ -2693,13 +2694,6 @@ def SKXWriteResGroup157 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,
}
def: InstRW<[SKXWriteResGroup157], (instregex "XCHG(8|16|32|64)rm")>;
def SKXWriteResGroup158 : SchedWriteRes<[SKXPort05,SKXPort0156]> {
let Latency = 10;
let NumMicroOps = 10;
let ResourceCycles = [9,1];
}
def: InstRW<[SKXWriteResGroup158], (instregex "MMX_EMMS")>;
def SKXWriteResGroup159 : SchedWriteRes<[SKXPort0,SKXFPDivider]> {
let Latency = 11;
let NumMicroOps = 1;

View File

@ -216,6 +216,9 @@ defm WriteAESKeyGen : X86SchedWritePair; // Key Generation.
// Carry-less multiplication instructions.
defm WriteCLMul : X86SchedWritePair;
// EMMS/FEMMS
def WriteEMMS : SchedWrite;
// Load/store MXCSR
def WriteLDMXCSR : SchedWrite;
def WriteSTMXCSR : SchedWrite;

View File

@ -201,6 +201,7 @@ def : WriteRes<WriteNop, [AtomPort01]>;
def : WriteRes<WriteFLoad, [AtomPort0]>;
def : WriteRes<WriteFStore, [AtomPort0]>;
def : WriteRes<WriteFMove, [AtomPort01]>;
defm : X86WriteRes<WriteEMMS,[AtomPort01], 5, [5], 1>;
defm : AtomWriteResPair<WriteFAdd, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFAddY, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
@ -490,8 +491,7 @@ def AtomWrite01_5 : SchedWriteRes<[AtomPort01]> {
let Latency = 5;
let ResourceCycles = [5];
}
def : InstRW<[AtomWrite01_5], (instrs FLDCW16m, ST_FP80m,
MMX_EMMS)>;
def : InstRW<[AtomWrite01_5], (instrs FLDCW16m, ST_FP80m)>;
def : InstRW<[AtomWrite01_5], (instregex "MMX_PH(ADD|SUB)S?Wrr")>;
def AtomWrite01_6 : SchedWriteRes<[AtomPort01]> {

View File

@ -314,6 +314,7 @@ def : WriteRes<WriteNop, [JALU01]> { let Latency = 1; }
def : WriteRes<WriteFLoad, [JLAGU, JFPU01, JFPX]> { let Latency = 5; }
def : WriteRes<WriteFStore, [JSAGU, JFPU1, JSTC]>;
def : WriteRes<WriteFMove, [JFPU01, JFPX]>;
def : WriteRes<WriteEMMS, [JFPU01, JFPX]> { let Latency = 2; }
defm : JWriteResFpuPair<WriteFAdd, [JFPU0, JFPA], 3>;
defm : JWriteResYMMPair<WriteFAddY, [JFPU0, JFPA], 3, [2,2], 2>;

View File

@ -128,6 +128,7 @@ defm : SLMWriteResPair<WriteIDiv, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>
def : WriteRes<WriteFStore, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
def : WriteRes<WriteFLoad, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteFMove, [SLM_FPC_RSV01]>;
defm : X86WriteRes<WriteEMMS, [SLM_FPC_RSV01], 10, [10], 9>;
defm : SLMWriteResPair<WriteFAdd, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFAddY, [SLM_FPC_RSV1], 3>;

View File

@ -234,6 +234,7 @@ def : WriteRes<WriteCvtF2FSt, [ZnFPU3, ZnAGU]>;
def : WriteRes<WriteVecStore, [ZnAGU]>;
def : WriteRes<WriteVecMove, [ZnFPU]>;
def : WriteRes<WriteVecLoad, [ZnAGU]> { let Latency = 8; }
def : WriteRes<WriteEMMS, [ZnFPU]> { let Latency = 2; }
defm : ZnWriteResFpuPair<WriteVecShift, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecShiftX, [ZnFPU2], 1>;

View File

@ -4,7 +4,7 @@
define void @test_femms() optsize {
; CHECK-LABEL: test_femms:
; CHECK: # %bb.0:
; CHECK-NEXT: femms # sched: [100:0.33]
; CHECK-NEXT: femms # sched: [31:10.33]
; CHECK-NEXT: retq # sched: [1:1.00]
call void @llvm.x86.mmx.femms()
ret void

View File

@ -526,7 +526,7 @@ declare x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float>) nounwind readnone
define void @test_emms() optsize {
; GENERIC-LABEL: test_emms:
; GENERIC: # %bb.0:
; GENERIC-NEXT: emms # sched: [100:0.33]
; GENERIC-NEXT: emms # sched: [31:10.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_emms:
@ -536,12 +536,12 @@ define void @test_emms() optsize {
;
; SLM-LABEL: test_emms:
; SLM: # %bb.0:
; SLM-NEXT: emms # sched: [100:1.00]
; SLM-NEXT: emms # sched: [10:5.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: test_emms:
; SANDY: # %bb.0:
; SANDY-NEXT: emms # sched: [100:0.33]
; SANDY-NEXT: emms # sched: [31:10.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_emms:
@ -566,12 +566,12 @@ define void @test_emms() optsize {
;
; BTVER2-LABEL: test_emms:
; BTVER2: # %bb.0:
; BTVER2-NEXT: emms # sched: [100:0.50]
; BTVER2-NEXT: emms # sched: [2:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_emms:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: emms # sched: [100:?]
; ZNVER1-NEXT: emms # sched: [2:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
call void @llvm.x86.mmx.emms()
ret void

View File

@ -164,7 +164,7 @@ pxor (%rax), %mm2
# CHECK-NEXT: [6]: HasSideEffects
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 100 0.50 * * * emms
# CHECK-NEXT: 1 2 0.50 * * * emms
# CHECK-NEXT: 1 1 0.50 movd %eax, %mm2
# CHECK-NEXT: 1 5 1.00 * movd (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 movd %mm0, %ecx
@ -288,11 +288,11 @@ pxor (%rax), %mm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: 2.50 2.50 - - - 51.00 45.00 46.00 - 2.00 - 45.00 45.00 6.00
# CHECK-NEXT: 2.00 2.00 - 0.50 0.50 51.50 45.50 46.00 - 2.00 - 45.00 45.00 6.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - emms
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - emms
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movd %eax, %mm2
# CHECK-NEXT: - - - - - - - 1.00 - - - - - - movd (%rax), %mm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movd %mm0, %ecx

View File

@ -164,7 +164,7 @@ pxor (%rax), %mm2
# CHECK-NEXT: [6]: HasSideEffects
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 100 1.00 * * * emms
# CHECK-NEXT: 9 10 5.00 * * * emms
# CHECK-NEXT: 1 1 0.50 movd %eax, %mm2
# CHECK-NEXT: 1 3 1.00 * movd (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 movd %mm0, %ecx
@ -282,11 +282,11 @@ pxor (%rax), %mm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
# CHECK-NEXT: - - - 73.00 24.00 3.00 3.00 48.00
# CHECK-NEXT: - - - 77.00 29.00 3.00 3.00 48.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
# CHECK-NEXT: - - - 1.00 - - - - emms
# CHECK-NEXT: - - - 5.00 5.00 - - - emms
# CHECK-NEXT: - - - - - 0.50 0.50 - movd %eax, %mm2
# CHECK-NEXT: - - - - - - - 1.00 movd (%rax), %mm2
# CHECK-NEXT: - - - - - 0.50 0.50 - movd %mm0, %ecx

View File

@ -164,7 +164,7 @@ pxor (%rax), %mm2
# CHECK-NEXT: [6]: HasSideEffects
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 100 0.33 * * * emms
# CHECK-NEXT: 31 31 10.33 * * * emms
# CHECK-NEXT: 1 1 0.33 movd %eax, %mm2
# CHECK-NEXT: 1 5 0.50 * movd (%rax), %mm2
# CHECK-NEXT: 1 1 0.33 movd %mm0, %ecx
@ -282,11 +282,11 @@ pxor (%rax), %mm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 10.33 44.33 2.00 46.33 24.00 24.00
# CHECK-NEXT: - - 20.33 54.33 2.00 56.33 24.00 24.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - emms
# CHECK-NEXT: - - 10.33 10.33 - 10.33 - - emms
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movd %eax, %mm2
# CHECK-NEXT: - - - - - - 0.50 0.50 movd (%rax), %mm2
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movd %mm0, %ecx

View File

@ -164,7 +164,7 @@ pxor (%rax), %mm2
# CHECK-NEXT: [6]: HasSideEffects
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 100 - * * * emms
# CHECK-NEXT: 1 2 0.25 * * * emms
# CHECK-NEXT: 1 3 1.00 movd %eax, %mm2
# CHECK-NEXT: 1 8 0.50 * movd (%rax), %mm2
# CHECK-NEXT: 1 2 1.00 movd %mm0, %ecx
@ -286,11 +286,11 @@ pxor (%rax), %mm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
# CHECK-NEXT: 24.00 24.00 - - - - - 27.00 24.00 28.00 21.00 -
# CHECK-NEXT: 24.00 24.00 - - - - - 27.25 24.25 28.25 21.25 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
# CHECK-NEXT: - - - - - - - - - - - - emms
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - emms
# CHECK-NEXT: - - - - - - - - - 1.00 - - movd %eax, %mm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - movd (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - 1.00 - - movd %mm0, %ecx