forked from OSchip/llvm-project
[X86] Split WriteFRcp/WriteFRsqrt/WriteFSqrt into XMM and YMM/ZMM scheduler classes
llvm-svn: 331290
This commit is contained in:
parent
fa862c45bc
commit
c708868cb1
|
@ -7863,14 +7863,18 @@ multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
}
|
||||
}
|
||||
|
||||
defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, WriteFRcp, f32x_info>,
|
||||
EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable;
|
||||
defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, WriteFRcp, f64x_info>,
|
||||
VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable;
|
||||
defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, WriteFRsqrt, f32x_info>,
|
||||
EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable;
|
||||
defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, WriteFRsqrt, f64x_info>,
|
||||
VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable;
|
||||
defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
|
||||
f32x_info>, EVEX_CD8<32, CD8VT1>,
|
||||
T8PD, NotMemoryFoldable;
|
||||
defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
|
||||
f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
|
||||
T8PD, NotMemoryFoldable;
|
||||
defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
|
||||
SchedWriteFRsqrt.Scl, f32x_info>,
|
||||
EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable;
|
||||
defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
|
||||
SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
|
||||
EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable;
|
||||
|
||||
/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
|
||||
multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
|
@ -7895,31 +7899,31 @@ multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
}
|
||||
|
||||
multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
X86FoldableSchedWrite sched> {
|
||||
defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched,
|
||||
X86SchedWriteWidths sched> {
|
||||
defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM,
|
||||
v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched,
|
||||
defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM,
|
||||
v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
|
||||
// Define only if AVX512VL feature is present.
|
||||
let Predicates = [HasVLX] in {
|
||||
defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
|
||||
OpNode, sched, v4f32x_info>,
|
||||
OpNode, sched.XMM, v4f32x_info>,
|
||||
EVEX_V128, EVEX_CD8<32, CD8VF>;
|
||||
defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
|
||||
OpNode, sched, v8f32x_info>,
|
||||
OpNode, sched.YMM, v8f32x_info>,
|
||||
EVEX_V256, EVEX_CD8<32, CD8VF>;
|
||||
defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
|
||||
OpNode, sched, v2f64x_info>,
|
||||
OpNode, sched.XMM, v2f64x_info>,
|
||||
EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
|
||||
OpNode, sched, v4f64x_info>,
|
||||
OpNode, sched.YMM, v4f64x_info>,
|
||||
EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
}
|
||||
}
|
||||
|
||||
defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, WriteFRsqrt>;
|
||||
defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, WriteFRcp>;
|
||||
defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>;
|
||||
defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>;
|
||||
|
||||
/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
|
||||
multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
|
||||
|
@ -8065,32 +8069,34 @@ multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
|
|||
}
|
||||
}
|
||||
|
||||
multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr> {
|
||||
defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), WriteFSqrt, v16f32_info>,
|
||||
multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
|
||||
X86SchedWriteWidths sched> {
|
||||
defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), sched.ZMM, v16f32_info>,
|
||||
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
|
||||
defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), WriteFSqrt, v8f64_info>,
|
||||
defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), sched.ZMM, v8f64_info>,
|
||||
EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
|
||||
// Define only if AVX512VL feature is present.
|
||||
let Predicates = [HasVLX] in {
|
||||
defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
|
||||
WriteFSqrt, v4f32x_info>,
|
||||
sched.XMM, v4f32x_info>,
|
||||
EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
|
||||
defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
|
||||
WriteFSqrt, v8f32x_info>,
|
||||
sched.YMM, v8f32x_info>,
|
||||
EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
|
||||
defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
|
||||
WriteFSqrt, v2f64x_info>,
|
||||
sched.XMM, v2f64x_info>,
|
||||
EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
|
||||
defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
|
||||
WriteFSqrt, v4f64x_info>,
|
||||
sched.YMM, v4f64x_info>,
|
||||
EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr> {
|
||||
defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), WriteFSqrt,
|
||||
multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
|
||||
X86SchedWriteWidths sched> {
|
||||
defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), sched.ZMM,
|
||||
v16f32_info>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
|
||||
defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), WriteFSqrt,
|
||||
defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), sched.ZMM,
|
||||
v8f64_info>, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
|
||||
}
|
||||
|
||||
|
@ -8153,20 +8159,21 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWri
|
|||
}
|
||||
}
|
||||
|
||||
multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr> {
|
||||
defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", WriteFSqrt, f32x_info, "SS",
|
||||
multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
|
||||
X86SchedWriteWidths sched> {
|
||||
defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.Scl, f32x_info, "SS",
|
||||
int_x86_sse_sqrt_ss>,
|
||||
EVEX_CD8<32, CD8VT1>, EVEX_4V, XS, NotMemoryFoldable;
|
||||
defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", WriteFSqrt, f64x_info, "SD",
|
||||
defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.Scl, f64x_info, "SD",
|
||||
int_x86_sse2_sqrt_sd>,
|
||||
EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W,
|
||||
NotMemoryFoldable;
|
||||
}
|
||||
|
||||
defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt">,
|
||||
avx512_sqrt_packed_all_round<0x51, "vsqrt">;
|
||||
defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrt>,
|
||||
avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrt>;
|
||||
|
||||
defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt">, VEX_LIG;
|
||||
defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrt>, VEX_LIG;
|
||||
|
||||
multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
|
||||
X86FoldableSchedWrite sched, X86VectorVTInfo _> {
|
||||
|
|
|
@ -2836,114 +2836,114 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
|
|||
|
||||
/// sse1_fp_unop_p - SSE1 unops in packed form.
|
||||
multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
X86FoldableSchedWrite sched, list<Predicate> prds> {
|
||||
X86SchedWriteWidths sched, list<Predicate> prds> {
|
||||
let Predicates = prds in {
|
||||
def V#NAME#PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
!strconcat("v", OpcodeStr,
|
||||
"ps\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>,
|
||||
VEX, Sched<[sched]>, VEX_WIG;
|
||||
VEX, Sched<[sched.XMM]>, VEX_WIG;
|
||||
def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
|
||||
!strconcat("v", OpcodeStr,
|
||||
"ps\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (OpNode (loadv4f32 addr:$src)))]>,
|
||||
VEX, Sched<[sched.Folded]>, VEX_WIG;
|
||||
VEX, Sched<[sched.XMM.Folded]>, VEX_WIG;
|
||||
def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
|
||||
!strconcat("v", OpcodeStr,
|
||||
"ps\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR256:$dst, (v8f32 (OpNode VR256:$src)))]>,
|
||||
VEX, VEX_L, Sched<[sched]>, VEX_WIG;
|
||||
VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG;
|
||||
def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
|
||||
!strconcat("v", OpcodeStr,
|
||||
"ps\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR256:$dst, (OpNode (loadv8f32 addr:$src)))]>,
|
||||
VEX, VEX_L, Sched<[sched.Folded]>, VEX_WIG;
|
||||
VEX, VEX_L, Sched<[sched.YMM.Folded]>, VEX_WIG;
|
||||
}
|
||||
|
||||
def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>,
|
||||
Sched<[sched]>;
|
||||
Sched<[sched.XMM]>;
|
||||
def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
|
||||
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>,
|
||||
Sched<[sched.Folded]>;
|
||||
Sched<[sched.XMM.Folded]>;
|
||||
}
|
||||
|
||||
/// sse2_fp_unop_p - SSE2 unops in vector forms.
|
||||
multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr,
|
||||
SDNode OpNode, X86FoldableSchedWrite sched> {
|
||||
SDNode OpNode, X86SchedWriteWidths sched> {
|
||||
let Predicates = [HasAVX, NoVLX] in {
|
||||
def V#NAME#PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
!strconcat("v", OpcodeStr,
|
||||
"pd\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>,
|
||||
VEX, Sched<[sched]>, VEX_WIG;
|
||||
VEX, Sched<[sched.XMM]>, VEX_WIG;
|
||||
def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
|
||||
!strconcat("v", OpcodeStr,
|
||||
"pd\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (OpNode (loadv2f64 addr:$src)))]>,
|
||||
VEX, Sched<[sched.Folded]>, VEX_WIG;
|
||||
VEX, Sched<[sched.XMM.Folded]>, VEX_WIG;
|
||||
def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
|
||||
!strconcat("v", OpcodeStr,
|
||||
"pd\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR256:$dst, (v4f64 (OpNode VR256:$src)))]>,
|
||||
VEX, VEX_L, Sched<[sched]>, VEX_WIG;
|
||||
VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG;
|
||||
def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
|
||||
!strconcat("v", OpcodeStr,
|
||||
"pd\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR256:$dst, (OpNode (loadv4f64 addr:$src)))]>,
|
||||
VEX, VEX_L, Sched<[sched.Folded]>, VEX_WIG;
|
||||
VEX, VEX_L, Sched<[sched.YMM.Folded]>, VEX_WIG;
|
||||
}
|
||||
|
||||
def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>,
|
||||
Sched<[sched]>;
|
||||
Sched<[sched.XMM]>;
|
||||
def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
|
||||
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>,
|
||||
Sched<[sched.Folded]>;
|
||||
Sched<[sched.XMM.Folded]>;
|
||||
}
|
||||
|
||||
multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
X86FoldableSchedWrite sched, Predicate AVXTarget> {
|
||||
X86SchedWriteWidths sched, Predicate AVXTarget> {
|
||||
defm SS : sse_fp_unop_s<opc, OpcodeStr##ss, FR32, v4f32, f32, f32mem,
|
||||
ssmem, sse_load_f32,
|
||||
!cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
|
||||
SSEPackedSingle, sched, UseSSE1, "SS">, XS;
|
||||
SSEPackedSingle, sched.Scl, UseSSE1, "SS">, XS;
|
||||
defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr##ss, FR32, v4f32, f32,
|
||||
f32mem, ssmem, sse_load_f32,
|
||||
!cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
|
||||
SSEPackedSingle, sched, AVXTarget, "SS">, XS, VEX_4V,
|
||||
SSEPackedSingle, sched.Scl, AVXTarget, "SS">, XS, VEX_4V,
|
||||
VEX_LIG, VEX_WIG, NotMemoryFoldable;
|
||||
}
|
||||
|
||||
multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
X86FoldableSchedWrite sched, Predicate AVXTarget> {
|
||||
X86SchedWriteWidths sched, Predicate AVXTarget> {
|
||||
defm SD : sse_fp_unop_s<opc, OpcodeStr##sd, FR64, v2f64, f64, f64mem,
|
||||
sdmem, sse_load_f64,
|
||||
!cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
|
||||
OpNode, SSEPackedDouble, sched, UseSSE2, "SD">, XD;
|
||||
OpNode, SSEPackedDouble, sched.Scl, UseSSE2, "SD">, XD;
|
||||
defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr##sd, FR64, v2f64, f64,
|
||||
f64mem, sdmem, sse_load_f64,
|
||||
!cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
|
||||
OpNode, SSEPackedDouble, sched, AVXTarget, "SD">,
|
||||
OpNode, SSEPackedDouble, sched.Scl, AVXTarget, "SD">,
|
||||
XD, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable;
|
||||
}
|
||||
|
||||
// Square root.
|
||||
defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, WriteFSqrt, UseAVX>,
|
||||
sse1_fp_unop_p<0x51, "sqrt", fsqrt, WriteFSqrt, [HasAVX, NoVLX]>,
|
||||
sse2_fp_unop_s<0x51, "sqrt", fsqrt, WriteFSqrt, UseAVX>,
|
||||
sse2_fp_unop_p<0x51, "sqrt", fsqrt, WriteFSqrt>;
|
||||
defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt, UseAVX>,
|
||||
sse1_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt, [HasAVX, NoVLX]>,
|
||||
sse2_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt, UseAVX>,
|
||||
sse2_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt>;
|
||||
|
||||
// Reciprocal approximations. Note that these typically require refinement
|
||||
// in order to obtain suitable precision.
|
||||
defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, WriteFRsqrt, HasAVX>,
|
||||
sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, WriteFRsqrt, [HasAVX]>;
|
||||
defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, WriteFRcp, HasAVX>,
|
||||
sse1_fp_unop_p<0x53, "rcp", X86frcp, WriteFRcp, [HasAVX]>;
|
||||
defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, HasAVX>,
|
||||
sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, [HasAVX]>;
|
||||
defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SchedWriteFRcp, HasAVX>,
|
||||
sse1_fp_unop_p<0x53, "rcp", X86frcp, SchedWriteFRcp, [HasAVX]>;
|
||||
|
||||
// There is no f64 version of the reciprocal approximation instructions.
|
||||
|
||||
|
|
|
@ -161,9 +161,12 @@ defm : BWWriteResPair<WriteFCmpY, [BWPort1], 3, [1], 1, 7>; // Floating point c
|
|||
defm : BWWriteResPair<WriteFCom, [BWPort1], 3>; // Floating point compare to flags.
|
||||
defm : BWWriteResPair<WriteFMul, [BWPort0], 5>; // Floating point multiplication.
|
||||
defm : BWWriteResPair<WriteFDiv, [BWPort0], 12>; // 10-14 cycles. // Floating point division.
|
||||
defm : BWWriteResPair<WriteFSqrt, [BWPort0], 15>; // Floating point square root.
|
||||
defm : BWWriteResPair<WriteFRcp, [BWPort0], 5>; // Floating point reciprocal estimate.
|
||||
defm : BWWriteResPair<WriteFRsqrt, [BWPort0], 5>; // Floating point reciprocal square root estimate.
|
||||
defm : BWWriteResPair<WriteFSqrt, [BWPort0], 15, [1], 1, 5>; // Floating point square root.
|
||||
defm : BWWriteResPair<WriteFSqrtY, [BWPort0], 15, [1], 1, 7>; // Floating point square root (YMM/ZMM).
|
||||
defm : BWWriteResPair<WriteFRcp, [BWPort0], 5, [1], 1, 5>; // Floating point reciprocal estimate.
|
||||
defm : BWWriteResPair<WriteFRcpY, [BWPort0], 5, [1], 1, 7>; // Floating point reciprocal estimate (YMM/ZMM).
|
||||
defm : BWWriteResPair<WriteFRsqrt, [BWPort0], 5, [1], 1, 5>; // Floating point reciprocal square root estimate.
|
||||
defm : BWWriteResPair<WriteFRsqrtY,[BWPort0], 5, [1], 1, 7>; // Floating point reciprocal square root estimate (YMM/ZMM).
|
||||
defm : BWWriteResPair<WriteFMA, [BWPort01], 5, [1], 1, 5>; // Fused Multiply Add.
|
||||
defm : BWWriteResPair<WriteFMAS, [BWPort01], 5, [1], 1, 5>; // Fused Multiply Add (Scalar).
|
||||
defm : BWWriteResPair<WriteFMAY, [BWPort01], 5, [1], 1, 6>; // Fused Multiply Add (YMM/ZMM).
|
||||
|
|
|
@ -155,9 +155,12 @@ defm : HWWriteResPair<WriteFCmpY, [HWPort1], 3, [1], 1, 7>;
|
|||
defm : HWWriteResPair<WriteFCom, [HWPort1], 3>;
|
||||
defm : HWWriteResPair<WriteFMul, [HWPort0], 5>;
|
||||
defm : HWWriteResPair<WriteFDiv, [HWPort0], 12>; // 10-14 cycles.
|
||||
defm : HWWriteResPair<WriteFRcp, [HWPort0], 5>;
|
||||
defm : HWWriteResPair<WriteFRsqrt, [HWPort0], 5>;
|
||||
defm : HWWriteResPair<WriteFSqrt, [HWPort0], 15>;
|
||||
defm : HWWriteResPair<WriteFRcp, [HWPort0], 5, [1], 1, 5>;
|
||||
defm : HWWriteResPair<WriteFRcpY, [HWPort0], 5, [1], 1, 7>;
|
||||
defm : HWWriteResPair<WriteFRsqrt, [HWPort0], 5, [1], 1, 5>;
|
||||
defm : HWWriteResPair<WriteFRsqrtY,[HWPort0], 5, [1], 1, 7>;
|
||||
defm : HWWriteResPair<WriteFSqrt, [HWPort0], 15, [1], 1, 5>;
|
||||
defm : HWWriteResPair<WriteFSqrtY, [HWPort0], 15, [1], 1, 7>;
|
||||
defm : HWWriteResPair<WriteCvtF2I, [HWPort1], 3>;
|
||||
defm : HWWriteResPair<WriteCvtI2F, [HWPort1], 4>;
|
||||
defm : HWWriteResPair<WriteCvtF2F, [HWPort1], 3>;
|
||||
|
|
|
@ -146,8 +146,11 @@ defm : SBWriteResPair<WriteFCom, [SBPort1], 3>;
|
|||
defm : SBWriteResPair<WriteFMul, [SBPort0], 5, [1], 1, 6>;
|
||||
defm : SBWriteResPair<WriteFDiv, [SBPort0], 24>;
|
||||
defm : SBWriteResPair<WriteFRcp, [SBPort0], 5, [1], 1, 6>;
|
||||
defm : SBWriteResPair<WriteFRcpY, [SBPort0], 5, [1], 1, 7>;
|
||||
defm : SBWriteResPair<WriteFRsqrt, [SBPort0], 5, [1], 1, 6>;
|
||||
defm : SBWriteResPair<WriteFSqrt, [SBPort0], 14>;
|
||||
defm : SBWriteResPair<WriteFRsqrtY,[SBPort0], 5, [1], 1, 7>;
|
||||
defm : SBWriteResPair<WriteFSqrt, [SBPort0], 14, [1], 1, 5>;
|
||||
defm : SBWriteResPair<WriteFSqrtY, [SBPort0], 14, [1], 1, 7>;
|
||||
defm : SBWriteResPair<WriteCvtF2I, [SBPort1], 3>;
|
||||
defm : SBWriteResPair<WriteCvtI2F, [SBPort1], 4>;
|
||||
defm : SBWriteResPair<WriteCvtF2F, [SBPort1], 3>;
|
||||
|
@ -1525,6 +1528,7 @@ def SBWriteResGroup114 : SchedWriteRes<[SBPort1,SBPort23]> {
|
|||
let ResourceCycles = [2,1];
|
||||
}
|
||||
def: InstRW<[SBWriteResGroup114], (instregex "(ADD|SUB|SUBR)_FI(16|32)m")>;
|
||||
|
||||
def SBWriteResGroup116 : SchedWriteRes<[SBPort0,SBFPDivider]> {
|
||||
let Latency = 14;
|
||||
let NumMicroOps = 1;
|
||||
|
|
|
@ -158,9 +158,12 @@ defm : SKLWriteResPair<WriteFCmpY, [SKLPort01], 4, [1], 1, 7>; // Floating point
|
|||
defm : SKLWriteResPair<WriteFCom, [SKLPort0], 2>; // Floating point compare to flags.
|
||||
defm : SKLWriteResPair<WriteFMul, [SKLPort0], 5>; // Floating point multiplication.
|
||||
defm : SKLWriteResPair<WriteFDiv, [SKLPort0], 12>; // 10-14 cycles. // Floating point division.
|
||||
defm : SKLWriteResPair<WriteFSqrt, [SKLPort0], 15>; // Floating point square root.
|
||||
defm : SKLWriteResPair<WriteFRcp, [SKLPort0], 4>; // Floating point reciprocal estimate.
|
||||
defm : SKLWriteResPair<WriteFRsqrt, [SKLPort0], 4>; // Floating point reciprocal square root estimate.
|
||||
defm : SKLWriteResPair<WriteFSqrt, [SKLPort0], 15, [1], 1, 5>; // Floating point square root.
|
||||
defm : SKLWriteResPair<WriteFSqrtY, [SKLPort0], 15, [1], 1, 7>; // Floating point square root (YMM/ZMM).
|
||||
defm : SKLWriteResPair<WriteFRcp, [SKLPort0], 4, [1], 1, 5>; // Floating point reciprocal estimate.
|
||||
defm : SKLWriteResPair<WriteFRcpY, [SKLPort0], 4, [1], 1, 5>; // Floating point reciprocal estimate (YMM/ZMM).
|
||||
defm : SKLWriteResPair<WriteFRsqrt, [SKLPort0], 4, [1], 1, 5>; // Floating point reciprocal square root estimate.
|
||||
defm : SKLWriteResPair<WriteFRsqrtY,[SKLPort0], 4, [1], 1, 5>; // Floating point reciprocal square root estimate (YMM/ZMM).
|
||||
defm : SKLWriteResPair<WriteFMA, [SKLPort01], 4, [1], 1, 6>; // Fused Multiply Add.
|
||||
defm : SKLWriteResPair<WriteFMAS, [SKLPort01], 4, [1], 1, 5>; // Fused Multiply Add (Scalar).
|
||||
defm : SKLWriteResPair<WriteFMAY, [SKLPort01], 4, [1], 1, 7>; // Fused Multiply Add (YMM/ZMM).
|
||||
|
|
|
@ -158,9 +158,12 @@ defm : SKXWriteResPair<WriteFCmpY,[SKXPort015], 4, [1], 1, 7>; // Floating point
|
|||
defm : SKXWriteResPair<WriteFCom, [SKXPort0], 2>; // Floating point compare to flags.
|
||||
defm : SKXWriteResPair<WriteFMul, [SKXPort015], 4, [1], 1, 6>; // Floating point multiplication.
|
||||
defm : SKXWriteResPair<WriteFDiv, [SKXPort0], 12>; // 10-14 cycles. // Floating point division.
|
||||
defm : SKXWriteResPair<WriteFSqrt, [SKXPort0], 15>; // Floating point square root.
|
||||
defm : SKXWriteResPair<WriteFSqrt, [SKXPort0], 15, [1], 1, 5>; // Floating point square root.
|
||||
defm : SKXWriteResPair<WriteFSqrtY, [SKXPort0], 15, [1], 1, 7>; // Floating point square root (YMM/ZMM).
|
||||
defm : SKXWriteResPair<WriteFRcp, [SKXPort0], 4, [1], 1, 6>; // Floating point reciprocal estimate.
|
||||
defm : SKXWriteResPair<WriteFRcpY, [SKXPort0], 4, [1], 1, 7>; // Floating point reciprocal estimate (YMM/ZMM).
|
||||
defm : SKXWriteResPair<WriteFRsqrt, [SKXPort0], 4, [1], 1, 6>; // Floating point reciprocal square root estimate.
|
||||
defm : SKXWriteResPair<WriteFRsqrtY,[SKXPort0], 4, [1], 1, 7>; // Floating point reciprocal square root estimate (YMM/ZMM).
|
||||
defm : SKXWriteResPair<WriteFMA, [SKXPort015], 4, [1], 1, 6>; // Fused Multiply Add.
|
||||
defm : SKXWriteResPair<WriteFMAS, [SKXPort015], 4, [1], 1, 5>; // Fused Multiply Add (Scalar).
|
||||
defm : SKXWriteResPair<WriteFMAY, [SKXPort015], 4, [1], 1, 7>; // Fused Multiply Add (YMM/ZMM).
|
||||
|
@ -3622,13 +3625,7 @@ def SKXWriteResGroup160 : SchedWriteRes<[SKXPort0,SKXPort23]> {
|
|||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1,1];
|
||||
}
|
||||
def: InstRW<[SKXWriteResGroup160], (instregex "MUL_F(32|64)m",
|
||||
"VRCP14PDZ256m(b?)",
|
||||
"VRCP14PSZ256m(b?)",
|
||||
"VRCPPSYm",
|
||||
"VRSQRT14PDZ256m(b?)",
|
||||
"VRSQRT14PSZ256m(b?)",
|
||||
"VRSQRTPSYm")>;
|
||||
def: InstRW<[SKXWriteResGroup160], (instregex "MUL_F(32|64)m")>;
|
||||
|
||||
def SKXWriteResGroup161 : SchedWriteRes<[SKXPort23,SKXPort015]> {
|
||||
let Latency = 11;
|
||||
|
|
|
@ -98,8 +98,11 @@ defm WriteFCom : X86SchedWritePair; // Floating point compare to flags.
|
|||
defm WriteFMul : X86SchedWritePair; // Floating point multiplication.
|
||||
defm WriteFDiv : X86SchedWritePair; // Floating point division.
|
||||
defm WriteFSqrt : X86SchedWritePair; // Floating point square root.
|
||||
defm WriteFSqrtY : X86SchedWritePair; // Floating point square root (YMM/ZMM).
|
||||
defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal estimate.
|
||||
defm WriteFRcpY : X86SchedWritePair; // Floating point reciprocal estimate (YMM/ZMM).
|
||||
defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate.
|
||||
defm WriteFRsqrtY: X86SchedWritePair; // Floating point reciprocal square root estimate (YMM/ZMM).
|
||||
defm WriteFMA : X86SchedWritePair; // Fused Multiply Add.
|
||||
defm WriteFMAS : X86SchedWritePair; // Fused Multiply Add (Scalar).
|
||||
defm WriteFMAY : X86SchedWritePair; // Fused Multiply Add (YMM/ZMM).
|
||||
|
@ -210,10 +213,12 @@ def SchedWriteFMul
|
|||
: X86SchedWriteWidths<WriteFMul, WriteFMul, WriteFMul, WriteFMul>;
|
||||
def SchedWriteFDiv
|
||||
: X86SchedWriteWidths<WriteFDiv, WriteFDiv, WriteFDiv, WriteFDiv>;
|
||||
def SchedWriteFSqrt
|
||||
: X86SchedWriteWidths<WriteFSqrt, WriteFSqrt, WriteFSqrtY, WriteFSqrtY>;
|
||||
def SchedWriteFRcp
|
||||
: X86SchedWriteWidths<WriteFRcp, WriteFRcp, WriteFRcp, WriteFRcp>;
|
||||
: X86SchedWriteWidths<WriteFRcp, WriteFRcp, WriteFRcpY, WriteFRcpY>;
|
||||
def SchedWriteFRsqrt
|
||||
: X86SchedWriteWidths<WriteFRsqrt, WriteFRsqrt, WriteFRsqrt, WriteFRsqrt>;
|
||||
: X86SchedWriteWidths<WriteFRsqrt, WriteFRsqrt, WriteFRsqrtY, WriteFRsqrtY>;
|
||||
def SchedWriteFLogic
|
||||
: X86SchedWriteWidths<WriteFLogic, WriteFLogic, WriteFLogicY, WriteFLogicY>;
|
||||
|
||||
|
|
|
@ -209,9 +209,12 @@ defm : AtomWriteResPair<WriteFCmpY, [AtomPort0], [AtomPort0], 5, 5,
|
|||
defm : AtomWriteResPair<WriteFCom, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
|
||||
defm : AtomWriteResPair<WriteFMul, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
|
||||
defm : AtomWriteResPair<WriteFRcp, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
|
||||
defm : AtomWriteResPair<WriteFRcpY, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
|
||||
defm : AtomWriteResPair<WriteFRsqrt, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
|
||||
defm : AtomWriteResPair<WriteFRsqrtY, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
|
||||
defm : AtomWriteResPair<WriteFDiv, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
|
||||
defm : AtomWriteResPair<WriteFSqrt, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
|
||||
defm : AtomWriteResPair<WriteFSqrtY, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
|
||||
defm : AtomWriteResPair<WriteFSign, [AtomPort1], [AtomPort1]>;
|
||||
defm : AtomWriteResPair<WriteFLogic, [AtomPort01], [AtomPort0]>;
|
||||
defm : AtomWriteResPair<WriteFLogicY, [AtomPort01], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
|
||||
|
|
|
@ -325,9 +325,12 @@ defm : JWriteResFpuPair<WriteFMA, [JFPU1, JFPM], 2>; // NOTE: Doesn't
|
|||
defm : JWriteResFpuPair<WriteFMAS, [JFPU1, JFPM], 2>; // NOTE: Doesn't exist on Jaguar.
|
||||
defm : JWriteResFpuPair<WriteFMAY, [JFPU1, JFPM], 2>; // NOTE: Doesn't exist on Jaguar.
|
||||
defm : JWriteResFpuPair<WriteFRcp, [JFPU1, JFPM], 2>;
|
||||
defm : JWriteResYMMPair<WriteFRcpY, [JFPU1, JFPM], 2, [2,2], 2>;
|
||||
defm : JWriteResFpuPair<WriteFRsqrt, [JFPU1, JFPM], 2>;
|
||||
defm : JWriteResYMMPair<WriteFRsqrtY, [JFPU1, JFPM], 2, [2,2], 2>;
|
||||
defm : JWriteResFpuPair<WriteFDiv, [JFPU1, JFPM], 19, [1, 19]>;
|
||||
defm : JWriteResFpuPair<WriteFSqrt, [JFPU1, JFPM], 21, [1, 21]>;
|
||||
defm : JWriteResYMMPair<WriteFSqrtY, [JFPU1, JFPM], 42, [2, 42], 2>;
|
||||
defm : JWriteResFpuPair<WriteFSign, [JFPU1, JFPM], 2>;
|
||||
defm : JWriteResFpuPair<WriteFLogic, [JFPU01, JFPX], 1>;
|
||||
defm : JWriteResYMMPair<WriteFLogicY, [JFPU01, JFPX], 1, [2, 2], 2>;
|
||||
|
@ -587,14 +590,14 @@ def JWriteVMULYPS: SchedWriteRes<[JFPU1, JFPM]> {
|
|||
let ResourceCycles = [2, 2];
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
def : InstRW<[JWriteVMULYPS], (instrs VMULPSYrr, VRCPPSYr, VRSQRTPSYr)>;
|
||||
def : InstRW<[JWriteVMULYPS], (instrs VMULPSYrr)>;
|
||||
|
||||
def JWriteVMULYPSLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> {
|
||||
let Latency = 7;
|
||||
let ResourceCycles = [2, 2, 2];
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
def : InstRW<[JWriteVMULYPSLd, ReadAfterLd], (instrs VMULPSYrm, VRCPPSYm, VRSQRTPSYm)>;
|
||||
def : InstRW<[JWriteVMULYPSLd, ReadAfterLd], (instrs VMULPSYrm)>;
|
||||
|
||||
def JWriteVMULPD: SchedWriteRes<[JFPU1, JFPM]> {
|
||||
let Latency = 4;
|
||||
|
@ -744,20 +747,6 @@ def JWriteVSQRTYPDLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> {
|
|||
}
|
||||
def : InstRW<[JWriteVSQRTYPDLd], (instrs VSQRTPDYm)>;
|
||||
|
||||
def JWriteVSQRTYPS: SchedWriteRes<[JFPU1, JFPM]> {
|
||||
let Latency = 42;
|
||||
let ResourceCycles = [2, 42];
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
def : InstRW<[JWriteVSQRTYPS], (instrs VSQRTPSYr)>;
|
||||
|
||||
def JWriteVSQRTYPSLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> {
|
||||
let Latency = 47;
|
||||
let ResourceCycles = [2, 2, 42];
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
def : InstRW<[JWriteVSQRTYPSLd], (instrs VSQRTPSYm)>;
|
||||
|
||||
def JWriteJVZEROALL: SchedWriteRes<[]> {
|
||||
let Latency = 90;
|
||||
let NumMicroOps = 73;
|
||||
|
|
|
@ -137,8 +137,11 @@ defm : SLMWriteResPair<WriteFCom, [SLM_FPC_RSV1], 3>;
|
|||
defm : SLMWriteResPair<WriteFMul, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
|
||||
defm : SLMWriteResPair<WriteFDiv, [SLM_FPC_RSV0, SLMFPDivider], 34, [1,34]>;
|
||||
defm : SLMWriteResPair<WriteFRcp, [SLM_FPC_RSV0], 5>;
|
||||
defm : SLMWriteResPair<WriteFRcpY, [SLM_FPC_RSV0], 5>;
|
||||
defm : SLMWriteResPair<WriteFRsqrt, [SLM_FPC_RSV0], 5>;
|
||||
defm : SLMWriteResPair<WriteFRsqrtY,[SLM_FPC_RSV0], 5>;
|
||||
defm : SLMWriteResPair<WriteFSqrt, [SLM_FPC_RSV0], 15>;
|
||||
defm : SLMWriteResPair<WriteFSqrtY, [SLM_FPC_RSV0], 15>;
|
||||
defm : SLMWriteResPair<WriteCvtF2I, [SLM_FPC_RSV01], 4>;
|
||||
defm : SLMWriteResPair<WriteCvtI2F, [SLM_FPC_RSV01], 4>;
|
||||
defm : SLMWriteResPair<WriteCvtF2F, [SLM_FPC_RSV01], 4>;
|
||||
|
|
|
@ -219,8 +219,11 @@ defm : ZnWriteResFpuPair<WriteFMA, [ZnFPU03], 5>;
|
|||
defm : ZnWriteResFpuPair<WriteFMAS, [ZnFPU03], 5>;
|
||||
defm : ZnWriteResFpuPair<WriteFMAY, [ZnFPU03], 5>;
|
||||
defm : ZnWriteResFpuPair<WriteFRcp, [ZnFPU01], 5>;
|
||||
defm : ZnWriteResFpuPair<WriteFRcpY, [ZnFPU01], 5>;
|
||||
defm : ZnWriteResFpuPair<WriteFRsqrt, [ZnFPU01], 5>;
|
||||
defm : ZnWriteResFpuPair<WriteFRsqrtY, [ZnFPU01], 5>;
|
||||
defm : ZnWriteResFpuPair<WriteFSqrt, [ZnFPU3], 20>;
|
||||
defm : ZnWriteResFpuPair<WriteFSqrtY, [ZnFPU3], 20>;
|
||||
def : WriteRes<WriteCvtF2FSt, [ZnFPU3, ZnAGU]>;
|
||||
|
||||
// Vector integer operations which uses FPU units
|
||||
|
|
Loading…
Reference in New Issue