forked from OSchip/llvm-project
[X86] Add vector element insertion/extraction scheduler classes
Split off pinsr/pextr and extractps instructions. (Mostly) fixes PR36887. Note: It might be worth adding a WriteFInsertLd class as well in the future. Differential Revision: https://reviews.llvm.org/D45929 llvm-svn: 330714
This commit is contained in:
parent
2922c102b3
commit
f7d2a93d5f
|
@ -1085,14 +1085,14 @@ def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
|
|||
(ins VR128X:$src1, u8imm:$src2),
|
||||
"vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
|
||||
EVEX, VEX_WIG, Sched<[WriteFBlend]>;
|
||||
EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
|
||||
|
||||
def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
|
||||
(ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
|
||||
"vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
|
||||
addr:$dst)]>,
|
||||
EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteFBlendLd, WriteRMW]>;
|
||||
EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// AVX-512 BROADCAST
|
||||
|
@ -9878,7 +9878,7 @@ multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
|
||||
addr:$dst)]>,
|
||||
EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteShuffleLd, WriteRMW]>;
|
||||
EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
|
||||
}
|
||||
|
||||
multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
|
||||
|
@ -9888,7 +9888,7 @@ multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
|
|||
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set GR32orGR64:$dst,
|
||||
(X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
|
||||
EVEX, TAPD, Sched<[WriteShuffle]>;
|
||||
EVEX, TAPD, Sched<[WriteVecExtract]>;
|
||||
|
||||
defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
|
||||
}
|
||||
|
@ -9901,14 +9901,14 @@ multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
|
|||
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set GR32orGR64:$dst,
|
||||
(X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
|
||||
EVEX, PD, Sched<[WriteShuffle]>;
|
||||
EVEX, PD, Sched<[WriteVecExtract]>;
|
||||
|
||||
let hasSideEffects = 0 in
|
||||
def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
|
||||
(ins _.RC:$src1, u8imm:$src2),
|
||||
OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
|
||||
EVEX, TAPD, FoldGenData<NAME#rr>,
|
||||
Sched<[WriteShuffle]>;
|
||||
Sched<[WriteVecExtract]>;
|
||||
|
||||
defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
|
||||
}
|
||||
|
@ -9922,7 +9922,7 @@ multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
|
|||
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set GRC:$dst,
|
||||
(extractelt (_.VT _.RC:$src1), imm:$src2))]>,
|
||||
EVEX, TAPD, Sched<[WriteShuffle]>;
|
||||
EVEX, TAPD, Sched<[WriteVecExtract]>;
|
||||
|
||||
def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
|
||||
(ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
|
||||
|
@ -9930,7 +9930,7 @@ multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
|
|||
[(store (extractelt (_.VT _.RC:$src1),
|
||||
imm:$src2),addr:$dst)]>,
|
||||
EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
|
||||
Sched<[WriteShuffleLd, WriteRMW]>;
|
||||
Sched<[WriteVecExtractSt]>;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -9946,7 +9946,7 @@ multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
||||
[(set _.RC:$dst,
|
||||
(_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
|
||||
EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteShuffleLd, ReadAfterLd]>;
|
||||
EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
|
@ -9957,7 +9957,7 @@ multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
||||
[(set _.RC:$dst,
|
||||
(OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
|
||||
Sched<[WriteShuffle]>;
|
||||
Sched<[WriteVecInsert]>;
|
||||
|
||||
defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
|
||||
}
|
||||
|
@ -9971,7 +9971,7 @@ multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
|
|||
OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
||||
[(set _.RC:$dst,
|
||||
(_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
|
||||
EVEX_4V, TAPD, Sched<[WriteShuffle]>;
|
||||
EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
|
||||
|
||||
defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
|
||||
_.ScalarLdFrag>, TAPD;
|
||||
|
|
|
@ -528,7 +528,7 @@ def MMX_PEXTRWrr: MMXIi8<0xC5, MRMSrcReg,
|
|||
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set GR32orGR64:$dst, (int_x86_mmx_pextr_w VR64:$src1,
|
||||
imm:$src2))]>,
|
||||
Sched<[WriteShuffle]>;
|
||||
Sched<[WriteVecExtract]>;
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
let Predicates = [HasSSE1] in {
|
||||
def MMX_PINSRWrr : MMXIi8<0xC4, MRMSrcReg,
|
||||
|
@ -537,7 +537,7 @@ let Predicates = [HasSSE1] in {
|
|||
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
|
||||
[(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
|
||||
GR32orGR64:$src2, imm:$src3))]>,
|
||||
Sched<[WriteShuffle]>;
|
||||
Sched<[WriteVecInsert]>;
|
||||
|
||||
def MMX_PINSRWrm : MMXIi8<0xC4, MRMSrcMem,
|
||||
(outs VR64:$dst),
|
||||
|
@ -546,7 +546,7 @@ let Predicates = [HasSSE1] in {
|
|||
[(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
|
||||
(i32 (anyext (loadi16 addr:$src2))),
|
||||
imm:$src3))]>,
|
||||
Sched<[WriteShuffleLd, ReadAfterLd]>;
|
||||
Sched<[WriteVecInsertLd, ReadAfterLd]>;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -3782,7 +3782,7 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
|
|||
"vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[(set VR128:$dst,
|
||||
(X86pinsrw VR128:$src1, GR32orGR64:$src2, imm:$src3))]>,
|
||||
Sched<[WriteShuffle]>;
|
||||
Sched<[WriteVecInsert]>;
|
||||
def rm : Ii8<0xC4, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins VR128:$src1,
|
||||
i16mem:$src2, u8imm:$src3),
|
||||
|
@ -3792,7 +3792,7 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
|
|||
[(set VR128:$dst,
|
||||
(X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
|
||||
imm:$src3))]>,
|
||||
Sched<[WriteShuffleLd, ReadAfterLd]>;
|
||||
Sched<[WriteVecInsertLd, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
// Extract
|
||||
|
@ -3802,13 +3802,13 @@ def VPEXTRWrr : Ii8<0xC5, MRMSrcReg,
|
|||
"vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1),
|
||||
imm:$src2))]>,
|
||||
PD, VEX, Sched<[WriteShuffle]>;
|
||||
PD, VEX, Sched<[WriteVecExtract]>;
|
||||
def PEXTRWrr : PDIi8<0xC5, MRMSrcReg,
|
||||
(outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2),
|
||||
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1),
|
||||
imm:$src2))]>,
|
||||
Sched<[WriteShuffle]>;
|
||||
Sched<[WriteVecExtract]>;
|
||||
|
||||
// Insert
|
||||
let Predicates = [HasAVX, NoBWI] in
|
||||
|
@ -5085,15 +5085,14 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
|
|||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set GR32orGR64:$dst, (X86pextrb (v16i8 VR128:$src1),
|
||||
imm:$src2))]>,
|
||||
Sched<[WriteShuffle]>;
|
||||
let hasSideEffects = 0, mayStore = 1,
|
||||
SchedRW = [WriteShuffleLd, WriteRMW] in
|
||||
Sched<[WriteVecExtract]>;
|
||||
let hasSideEffects = 0, mayStore = 1 in
|
||||
def mr : SS4AIi8<opc, MRMDestMem, (outs),
|
||||
(ins i8mem:$dst, VR128:$src1, u8imm:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))),
|
||||
addr:$dst)]>;
|
||||
addr:$dst)]>, Sched<[WriteVecExtractSt]>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX, NoBWI] in
|
||||
|
@ -5109,16 +5108,15 @@ multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
|
|||
(ins VR128:$src1, u8imm:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
|
||||
Sched<[WriteShuffle]>, FoldGenData<NAME#ri>;
|
||||
Sched<[WriteVecExtract]>, FoldGenData<NAME#ri>;
|
||||
|
||||
let hasSideEffects = 0, mayStore = 1,
|
||||
SchedRW = [WriteShuffleLd, WriteRMW] in
|
||||
let hasSideEffects = 0, mayStore = 1 in
|
||||
def mr : SS4AIi8<opc, MRMDestMem, (outs),
|
||||
(ins i16mem:$dst, VR128:$src1, u8imm:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(store (i16 (trunc (X86pextrw (v8i16 VR128:$src1), imm:$src2))),
|
||||
addr:$dst)]>;
|
||||
addr:$dst)]>, Sched<[WriteVecExtractSt]>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX, NoBWI] in
|
||||
|
@ -5135,14 +5133,13 @@ multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
|
|||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set GR32:$dst,
|
||||
(extractelt (v4i32 VR128:$src1), imm:$src2))]>,
|
||||
Sched<[WriteShuffle]>;
|
||||
let SchedRW = [WriteShuffleLd, WriteRMW] in
|
||||
Sched<[WriteVecExtract]>;
|
||||
def mr : SS4AIi8<opc, MRMDestMem, (outs),
|
||||
(ins i32mem:$dst, VR128:$src1, u8imm:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(store (extractelt (v4i32 VR128:$src1), imm:$src2),
|
||||
addr:$dst)]>;
|
||||
addr:$dst)]>, Sched<[WriteVecExtractSt]>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX, NoDQI] in
|
||||
|
@ -5158,14 +5155,13 @@ multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> {
|
|||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set GR64:$dst,
|
||||
(extractelt (v2i64 VR128:$src1), imm:$src2))]>,
|
||||
Sched<[WriteShuffle]>;
|
||||
let SchedRW = [WriteShuffleLd, WriteRMW] in
|
||||
Sched<[WriteVecExtract]>;
|
||||
def mr : SS4AIi8<opc, MRMDestMem, (outs),
|
||||
(ins i64mem:$dst, VR128:$src1, u8imm:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(store (extractelt (v2i64 VR128:$src1), imm:$src2),
|
||||
addr:$dst)]>;
|
||||
addr:$dst)]>, Sched<[WriteVecExtractSt]>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX, NoDQI] in
|
||||
|
@ -5182,14 +5178,13 @@ multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
|
|||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set GR32orGR64:$dst,
|
||||
(extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>,
|
||||
Sched<[WriteFBlend]>;
|
||||
let SchedRW = [WriteFBlendLd, WriteRMW] in
|
||||
Sched<[WriteVecExtract]>;
|
||||
def mr : SS4AIi8<opc, MRMDestMem, (outs),
|
||||
(ins f32mem:$dst, VR128:$src1, u8imm:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2),
|
||||
addr:$dst)]>;
|
||||
addr:$dst)]>, Sched<[WriteVecExtractSt]>;
|
||||
}
|
||||
|
||||
let ExeDomain = SSEPackedSingle in {
|
||||
|
@ -5223,7 +5218,7 @@ multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
|
|||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set VR128:$dst,
|
||||
(X86pinsrb VR128:$src1, GR32orGR64:$src2, imm:$src3))]>,
|
||||
Sched<[WriteShuffle]>;
|
||||
Sched<[WriteVecInsert]>;
|
||||
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i8mem:$src2, u8imm:$src3),
|
||||
!if(Is2Addr,
|
||||
|
@ -5232,7 +5227,7 @@ multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
|
|||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set VR128:$dst,
|
||||
(X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
|
||||
imm:$src3))]>, Sched<[WriteShuffleLd, ReadAfterLd]>;
|
||||
imm:$src3))]>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX, NoBWI] in
|
||||
|
@ -5249,7 +5244,7 @@ multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
|
|||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set VR128:$dst,
|
||||
(v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
|
||||
Sched<[WriteShuffle]>;
|
||||
Sched<[WriteVecInsert]>;
|
||||
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i32mem:$src2, u8imm:$src3),
|
||||
!if(Is2Addr,
|
||||
|
@ -5258,7 +5253,7 @@ multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
|
|||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set VR128:$dst,
|
||||
(v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2),
|
||||
imm:$src3)))]>, Sched<[WriteShuffleLd, ReadAfterLd]>;
|
||||
imm:$src3)))]>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX, NoDQI] in
|
||||
|
@ -5275,7 +5270,7 @@ multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
|
|||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set VR128:$dst,
|
||||
(v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>,
|
||||
Sched<[WriteShuffle]>;
|
||||
Sched<[WriteVecInsert]>;
|
||||
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i64mem:$src2, u8imm:$src3),
|
||||
!if(Is2Addr,
|
||||
|
@ -5284,7 +5279,7 @@ multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
|
|||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set VR128:$dst,
|
||||
(v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2),
|
||||
imm:$src3)))]>, Sched<[WriteShuffleLd, ReadAfterLd]>;
|
||||
imm:$src3)))]>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX, NoDQI] in
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
def BroadwellModel : SchedMachineModel {
|
||||
// All x86 instructions are modeled as a single micro-op, and HW can decode 4
|
||||
// All x86 instructions are modeled as a single micro-op, and BW can decode 4
|
||||
// instructions per cycle.
|
||||
let IssueWidth = 4;
|
||||
let MicroOpBufferSize = 192; // Based on the reorder buffer.
|
||||
|
@ -190,6 +190,26 @@ defm : BWWriteResPair<WriteVarBlend, [BWPort5], 2, [2], 2, 5>; // Vector variab
|
|||
defm : BWWriteResPair<WriteMPSAD, [BWPort0, BWPort5], 7, [1, 2], 3, 5>; // Vector MPSAD.
|
||||
defm : BWWriteResPair<WritePSADBW, [BWPort0], 5>; // Vector PSADBW.
|
||||
|
||||
// Vector insert/extract operations.
|
||||
def : WriteRes<WriteVecInsert, [BWPort5]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [2];
|
||||
}
|
||||
def : WriteRes<WriteVecInsertLd, [BWPort5,BWPort23]> {
|
||||
let Latency = 6;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
|
||||
def : WriteRes<WriteVecExtract, [BWPort0,BWPort5]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
def : WriteRes<WriteVecExtractSt, [BWPort4,BWPort5,BWPort237]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 3;
|
||||
}
|
||||
|
||||
// Conversion between integer and float.
|
||||
defm : BWWriteResPair<WriteCvtF2I, [BWPort1], 3>; // Float -> Integer.
|
||||
defm : BWWriteResPair<WriteCvtI2F, [BWPort1], 4>; // Integer -> Float.
|
||||
|
@ -462,17 +482,6 @@ def: InstRW<[BWWriteResGroup10], (instregex "FBSTPm",
|
|||
"(V?)MOVUPD(Y?)mr",
|
||||
"(V?)MOVUPS(Y?)mr")>;
|
||||
|
||||
def BWWriteResGroup11 : SchedWriteRes<[BWPort5]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [2];
|
||||
}
|
||||
def: InstRW<[BWWriteResGroup11], (instregex "MMX_PINSRWrr",
|
||||
"(V?)PINSRBrr",
|
||||
"(V?)PINSRDrr",
|
||||
"(V?)PINSRQrr",
|
||||
"(V?)PINSRWrr")>;
|
||||
|
||||
def BWWriteResGroup12 : SchedWriteRes<[BWPort01]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 2;
|
||||
|
@ -505,15 +514,9 @@ def BWWriteResGroup15 : SchedWriteRes<[BWPort0,BWPort5]> {
|
|||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1,1];
|
||||
}
|
||||
def: InstRW<[BWWriteResGroup15], (instregex "MMX_PEXTRWrr",
|
||||
"VCVTPH2PS(Y?)rr",
|
||||
def: InstRW<[BWWriteResGroup15], (instregex "VCVTPH2PS(Y?)rr",
|
||||
"(V?)CVTPS2PDrr",
|
||||
"(V?)CVTSS2SDrr",
|
||||
"(V?)EXTRACTPSrr",
|
||||
"(V?)PEXTRBrr",
|
||||
"(V?)PEXTRDrr",
|
||||
"(V?)PEXTRQrr",
|
||||
"(V?)PEXTRWrr",
|
||||
"(V?)PSLLDrr",
|
||||
"(V?)PSLLQrr",
|
||||
"(V?)PSLLWrr",
|
||||
|
@ -573,17 +576,6 @@ def: InstRW<[BWWriteResGroup20], (instregex "ADC8i8",
|
|||
"SBB8ri",
|
||||
"SET(A|BE)r")>;
|
||||
|
||||
def BWWriteResGroup21 : SchedWriteRes<[BWPort4,BWPort5,BWPort237]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 3;
|
||||
let ResourceCycles = [1,1,1];
|
||||
}
|
||||
def: InstRW<[BWWriteResGroup21], (instregex "(V?)EXTRACTPSmr",
|
||||
"(V?)PEXTRBmr",
|
||||
"(V?)PEXTRDmr",
|
||||
"(V?)PEXTRQmr",
|
||||
"(V?)PEXTRWmr")>;
|
||||
|
||||
def BWWriteResGroup22 : SchedWriteRes<[BWPort4,BWPort6,BWPort237]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 3;
|
||||
|
|
|
@ -189,6 +189,26 @@ defm : HWWriteResPair<WriteVarVecShift, [HWPort0, HWPort5], 2, [2, 1]>;
|
|||
defm : HWWriteResPair<WriteMPSAD, [HWPort0, HWPort5], 7, [1, 2], 3, 6>;
|
||||
defm : HWWriteResPair<WritePSADBW, [HWPort0], 5>;
|
||||
|
||||
// Vector insert/extract operations.
|
||||
def : WriteRes<WriteVecInsert, [HWPort5]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [2];
|
||||
}
|
||||
def : WriteRes<WriteVecInsertLd, [HWPort5,HWPort23]> {
|
||||
let Latency = 6;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
|
||||
def : WriteRes<WriteVecExtract, [HWPort0,HWPort5]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
def : WriteRes<WriteVecExtractSt, [HWPort4,HWPort5,HWPort237]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 3;
|
||||
}
|
||||
|
||||
// String instructions.
|
||||
|
||||
// Packed Compare Implicit Length Strings, Return Mask
|
||||
|
@ -1092,17 +1112,6 @@ def HWWriteResGroup19 : SchedWriteRes<[HWPort237,HWPort0156]> {
|
|||
}
|
||||
def: InstRW<[HWWriteResGroup19], (instregex "SFENCE")>;
|
||||
|
||||
def HWWriteResGroup20 : SchedWriteRes<[HWPort4,HWPort5,HWPort237]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 3;
|
||||
let ResourceCycles = [1,1,1];
|
||||
}
|
||||
def: InstRW<[HWWriteResGroup20], (instregex "(V?)EXTRACTPSmr",
|
||||
"(V?)PEXTRBmr",
|
||||
"(V?)PEXTRDmr",
|
||||
"(V?)PEXTRQmr",
|
||||
"(V?)PEXTRWmr")>;
|
||||
|
||||
def HWWriteResGroup21 : SchedWriteRes<[HWPort4,HWPort6,HWPort237]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 3;
|
||||
|
@ -1160,17 +1169,6 @@ def HWWriteResGroup26 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0156]> {
|
|||
def: InstRW<[HWWriteResGroup26], (instregex "POP(16|32|64)rmm",
|
||||
"PUSH(16|32|64)rmm")>;
|
||||
|
||||
def HWWriteResGroup27 : SchedWriteRes<[HWPort5]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [2];
|
||||
}
|
||||
def: InstRW<[HWWriteResGroup27], (instregex "MMX_PINSRWrr",
|
||||
"(V?)PINSRBrr",
|
||||
"(V?)PINSRDrr",
|
||||
"(V?)PINSRQrr",
|
||||
"(V?)PINSRWrr")>;
|
||||
|
||||
def HWWriteResGroup28 : SchedWriteRes<[HWPort01]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 2;
|
||||
|
@ -1203,16 +1201,10 @@ def HWWriteResGroup31 : SchedWriteRes<[HWPort0,HWPort5]> {
|
|||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1,1];
|
||||
}
|
||||
def: InstRW<[HWWriteResGroup31], (instregex "MMX_PEXTRWrr",
|
||||
"VCVTPH2PSYrr",
|
||||
def: InstRW<[HWWriteResGroup31], (instregex "VCVTPH2PSYrr",
|
||||
"VCVTPH2PSrr",
|
||||
"(V?)CVTPS2PDrr",
|
||||
"(V?)CVTSS2SDrr",
|
||||
"(V?)EXTRACTPSrr",
|
||||
"(V?)PEXTRBrr",
|
||||
"(V?)PEXTRDrr",
|
||||
"(V?)PEXTRQrr",
|
||||
"(V?)PEXTRWrr",
|
||||
"(V?)PSLLDrr",
|
||||
"(V?)PSLLQrr",
|
||||
"(V?)PSLLWrr",
|
||||
|
|
|
@ -173,6 +173,25 @@ defm : SBWriteResPair<WriteVarBlend, [SBPort15], 2, [2], 2, 6>;
|
|||
defm : SBWriteResPair<WriteMPSAD, [SBPort0, SBPort15], 7, [1,2], 3, 6>;
|
||||
defm : SBWriteResPair<WritePSADBW, [SBPort0], 5>;
|
||||
|
||||
// Vector insert/extract operations.
|
||||
def : WriteRes<WriteVecInsert, [SBPort5,SBPort15]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
def : WriteRes<WriteVecInsertLd, [SBPort23,SBPort15]> {
|
||||
let Latency = 7;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
|
||||
def : WriteRes<WriteVecExtract, [SBPort0,SBPort15]> {
|
||||
let Latency = 3;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
def : WriteRes<WriteVecExtractSt, [SBPort4,SBPort23,SBPort15]> {
|
||||
let Latency = 5;
|
||||
let NumMicroOps = 3;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Horizontal add/sub instructions.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -535,16 +554,6 @@ def SBWriteResGroup16_1 : SchedWriteRes<[SBPort1]> {
|
|||
}
|
||||
def: InstRW<[SBWriteResGroup16_1], (instrs BSWAP32r)>;
|
||||
|
||||
def SBWriteResGroup17 : SchedWriteRes<[SBPort5,SBPort15]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1,1];
|
||||
}
|
||||
def: InstRW<[SBWriteResGroup17], (instregex "(V?)PINSRBrr",
|
||||
"(V?)PINSRDrr",
|
||||
"(V?)PINSRQrr",
|
||||
"(V?)PINSRWrr")>;
|
||||
|
||||
def SBWriteResGroup18 : SchedWriteRes<[SBPort5,SBPort015]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 2;
|
||||
|
@ -590,16 +599,6 @@ def SBWriteResGroup22 : SchedWriteRes<[SBPort0,SBPort5]> {
|
|||
}
|
||||
def: InstRW<[SBWriteResGroup22], (instregex "(V?)EXTRACTPSrr")>;
|
||||
|
||||
def SBWriteResGroup23 : SchedWriteRes<[SBPort0,SBPort15]> {
|
||||
let Latency = 3;
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1,1];
|
||||
}
|
||||
def: InstRW<[SBWriteResGroup23], (instregex "(V?)PEXTRBrr",
|
||||
"(V?)PEXTRDrr",
|
||||
"(V?)PEXTRQrr",
|
||||
"(V?)PEXTRWrr")>;
|
||||
|
||||
def SBWriteResGroup23_2 : SchedWriteRes<[SBPort05]> {
|
||||
let Latency = 3;
|
||||
let NumMicroOps = 3;
|
||||
|
@ -793,15 +792,6 @@ def SBWriteResGroup37 : SchedWriteRes<[SBPort4,SBPort01,SBPort23]> {
|
|||
def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPD(Y?)mr",
|
||||
"VMASKMOVPS(Y?)mr")>;
|
||||
|
||||
def SBWriteResGroup39 : SchedWriteRes<[SBPort4,SBPort23,SBPort15]> {
|
||||
let Latency = 5;
|
||||
let NumMicroOps = 3;
|
||||
let ResourceCycles = [1,1,1];
|
||||
}
|
||||
def: InstRW<[SBWriteResGroup39], (instregex "(V?)PEXTRBmr",
|
||||
"VPEXTRDmr",
|
||||
"VPEXTRWmr")>;
|
||||
|
||||
def SBWriteResGroup40 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
|
||||
let Latency = 5;
|
||||
let NumMicroOps = 3;
|
||||
|
@ -1009,10 +999,6 @@ def: InstRW<[SBWriteResGroup59], (instregex "MMX_PADDQirm",
|
|||
"(V?)PCMPGTBrm",
|
||||
"(V?)PCMPGTDrm",
|
||||
"(V?)PCMPGTWrm",
|
||||
"(V?)PINSRBrm",
|
||||
"(V?)PINSRDrm",
|
||||
"(V?)PINSRQrm",
|
||||
"(V?)PINSRWrm",
|
||||
"(V?)PMAXSBrm",
|
||||
"(V?)PMAXSDrm",
|
||||
"(V?)PMAXSWrm",
|
||||
|
|
|
@ -187,6 +187,26 @@ defm : SKLWriteResPair<WriteVarBlend, [SKLPort015], 2, [2], 2, 6>; // Vector va
|
|||
defm : SKLWriteResPair<WriteMPSAD, [SKLPort5], 4, [2], 2, 6>; // Vector MPSAD.
|
||||
defm : SKLWriteResPair<WritePSADBW, [SKLPort5], 3>; // Vector PSADBW.
|
||||
|
||||
// Vector insert/extract operations.
|
||||
def : WriteRes<WriteVecInsert, [SKLPort5]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [2];
|
||||
}
|
||||
def : WriteRes<WriteVecInsertLd, [SKLPort5,SKLPort23]> {
|
||||
let Latency = 6;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
|
||||
def : WriteRes<WriteVecExtract, [SKLPort0,SKLPort5]> {
|
||||
let Latency = 3;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
def : WriteRes<WriteVecExtractSt, [SKLPort4,SKLPort5,SKLPort237]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 3;
|
||||
}
|
||||
|
||||
// Conversion between integer and float.
|
||||
defm : SKLWriteResPair<WriteCvtF2I, [SKLPort1], 3>; // Float -> Integer.
|
||||
defm : SKLWriteResPair<WriteCvtI2F, [SKLPort1], 4>; // Integer -> Float.
|
||||
|
@ -571,12 +591,7 @@ def SKLWriteResGroup13 : SchedWriteRes<[SKLPort5]> {
|
|||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [2];
|
||||
}
|
||||
def: InstRW<[SKLWriteResGroup13], (instregex "MMX_MOVQ2DQrr",
|
||||
"MMX_PINSRWrr",
|
||||
"(V?)PINSRBrr",
|
||||
"(V?)PINSRDrr",
|
||||
"(V?)PINSRQrr",
|
||||
"(V?)PINSRWrr")>;
|
||||
def: InstRW<[SKLWriteResGroup13], (instregex "MMX_MOVQ2DQrr")>;
|
||||
|
||||
def SKLWriteResGroup14 : SchedWriteRes<[SKLPort05]> {
|
||||
let Latency = 2;
|
||||
|
@ -671,17 +686,6 @@ def: InstRW<[SKLWriteResGroup23], (instregex "ADC8i8",
|
|||
"SBB8i8",
|
||||
"SBB8ri")>;
|
||||
|
||||
def SKLWriteResGroup24 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 3;
|
||||
let ResourceCycles = [1,1,1];
|
||||
}
|
||||
def: InstRW<[SKLWriteResGroup24], (instregex "(V?)EXTRACTPSmr",
|
||||
"(V?)PEXTRBmr",
|
||||
"(V?)PEXTRDmr",
|
||||
"(V?)PEXTRQmr",
|
||||
"(V?)PEXTRWmr")>;
|
||||
|
||||
def SKLWriteResGroup25 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort237]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 3;
|
||||
|
@ -761,13 +765,7 @@ def SKLWriteResGroup31 : SchedWriteRes<[SKLPort0,SKLPort5]> {
|
|||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1,1];
|
||||
}
|
||||
def: InstRW<[SKLWriteResGroup31], (instregex "MMX_PEXTRWrr",
|
||||
"(V?)EXTRACTPSrr",
|
||||
"(V?)PEXTRBrr",
|
||||
"(V?)PEXTRDrr",
|
||||
"(V?)PEXTRQrr",
|
||||
"(V?)PEXTRWrr",
|
||||
"(V?)PTEST(Y?)rr")>;
|
||||
def: InstRW<[SKLWriteResGroup31], (instregex "(V?)PTEST(Y?)rr")>;
|
||||
|
||||
def SKLWriteResGroup32 : SchedWriteRes<[SKLPort0,SKLPort0156]> {
|
||||
let Latency = 3;
|
||||
|
|
|
@ -187,6 +187,26 @@ defm : SKXWriteResPair<WriteVarBlend, [SKXPort015], 2, [2], 2, 6>; // Vector var
|
|||
defm : SKXWriteResPair<WriteMPSAD, [SKXPort5], 4, [2], 2, 6>; // Vector MPSAD.
|
||||
defm : SKXWriteResPair<WritePSADBW, [SKXPort5], 3, [1,1], 1, 6>; // Vector PSADBW.
|
||||
|
||||
// Vector insert/extract operations.
|
||||
def : WriteRes<WriteVecInsert, [SKXPort5]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [2];
|
||||
}
|
||||
def : WriteRes<WriteVecInsertLd, [SKXPort5,SKXPort23]> {
|
||||
let Latency = 6;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
|
||||
def : WriteRes<WriteVecExtract, [SKXPort0,SKXPort5]> {
|
||||
let Latency = 3;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
def : WriteRes<WriteVecExtractSt, [SKXPort4,SKXPort5,SKXPort237]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 3;
|
||||
}
|
||||
|
||||
// Conversion between integer and float.
|
||||
defm : SKXWriteResPair<WriteCvtF2I, [SKXPort1], 3>; // Float -> Integer.
|
||||
defm : SKXWriteResPair<WriteCvtI2F, [SKXPort1], 4>; // Integer -> Float.
|
||||
|
@ -1035,20 +1055,7 @@ def SKXWriteResGroup13 : SchedWriteRes<[SKXPort5]> {
|
|||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [2];
|
||||
}
|
||||
def: InstRW<[SKXWriteResGroup13], (instregex "MMX_MOVQ2DQrr",
|
||||
"MMX_PINSRWrr",
|
||||
"PINSRBrr",
|
||||
"PINSRDrr",
|
||||
"PINSRQrr",
|
||||
"PINSRWrr",
|
||||
"VPINSRBZrr",
|
||||
"VPINSRBrr",
|
||||
"VPINSRDZrr",
|
||||
"VPINSRDrr",
|
||||
"VPINSRQZrr",
|
||||
"VPINSRQrr",
|
||||
"VPINSRWZrr",
|
||||
"VPINSRWrr")>;
|
||||
def: InstRW<[SKXWriteResGroup13], (instregex "MMX_MOVQ2DQrr")>;
|
||||
|
||||
def SKXWriteResGroup14 : SchedWriteRes<[SKXPort05]> {
|
||||
let Latency = 2;
|
||||
|
@ -1163,27 +1170,6 @@ def: InstRW<[SKXWriteResGroup23], (instregex "ADC8i8",
|
|||
"SBB8i8",
|
||||
"SBB8ri")>;
|
||||
|
||||
def SKXWriteResGroup24 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 3;
|
||||
let ResourceCycles = [1,1,1];
|
||||
}
|
||||
def: InstRW<[SKXWriteResGroup24], (instregex "EXTRACTPSmr",
|
||||
"PEXTRBmr",
|
||||
"PEXTRDmr",
|
||||
"PEXTRQmr",
|
||||
"PEXTRWmr",
|
||||
"VEXTRACTPSZmr(b?)",
|
||||
"VEXTRACTPSmr",
|
||||
"VPEXTRBZmr(b?)",
|
||||
"VPEXTRBmr",
|
||||
"VPEXTRDZmr(b?)",
|
||||
"VPEXTRDmr",
|
||||
"VPEXTRQZmr(b?)",
|
||||
"VPEXTRQmr",
|
||||
"VPEXTRWZmr(b?)",
|
||||
"VPEXTRWmr")>;
|
||||
|
||||
def SKXWriteResGroup25 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort237]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 3;
|
||||
|
@ -1455,25 +1441,7 @@ def SKXWriteResGroup33 : SchedWriteRes<[SKXPort0,SKXPort5]> {
|
|||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1,1];
|
||||
}
|
||||
def: InstRW<[SKXWriteResGroup33], (instregex "EXTRACTPSrr",
|
||||
"MMX_PEXTRWrr",
|
||||
"PEXTRBrr",
|
||||
"PEXTRDrr",
|
||||
"PEXTRQrr",
|
||||
"PEXTRWrr",
|
||||
"PTESTrr",
|
||||
"VEXTRACTPSZrr",
|
||||
"VEXTRACTPSrr",
|
||||
"VPEXTRBZrr",
|
||||
"VPEXTRBrr",
|
||||
"VPEXTRDZrr",
|
||||
"VPEXTRDrr",
|
||||
"VPEXTRQZrr",
|
||||
"VPEXTRQrr",
|
||||
"VPEXTRWZrr",
|
||||
"VPEXTRWrr",
|
||||
"VPTESTYrr",
|
||||
"VPTESTrr")>;
|
||||
def: InstRW<[SKXWriteResGroup33], (instregex "(V?)PTEST(Y?)rr")>;
|
||||
|
||||
def SKXWriteResGroup34 : SchedWriteRes<[SKXPort0,SKXPort0156]> {
|
||||
let Latency = 3;
|
||||
|
|
|
@ -117,6 +117,11 @@ defm WriteVarBlend : X86SchedWritePair; // Vector variable blends.
|
|||
defm WritePSADBW : X86SchedWritePair; // Vector PSADBW.
|
||||
defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD.
|
||||
|
||||
// Vector insert/extract operations.
|
||||
defm WriteVecInsert : X86SchedWritePair; // Insert gpr to vector element.
|
||||
def WriteVecExtract : SchedWrite; // Extract vector element to gpr.
|
||||
def WriteVecExtractSt : SchedWrite; // Extract vector element and store.
|
||||
|
||||
// MOVMSK operations.
|
||||
def WriteFMOVMSK : SchedWrite;
|
||||
def WriteVecMOVMSK : SchedWrite;
|
||||
|
|
|
@ -251,6 +251,14 @@ defm : AtomWriteResPair<WriteShuffle256, [AtomPort0], [AtomPort0]>; // NOTE:
|
|||
defm : AtomWriteResPair<WriteVarShuffle256, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
|
||||
defm : AtomWriteResPair<WriteVarVecShift, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Vector insert/extract operations.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
defm : AtomWriteResPair<WriteVecInsert, [AtomPort0], [AtomPort0], 1, 1>;
|
||||
def : WriteRes<WriteVecExtract, [AtomPort0]>;
|
||||
def : WriteRes<WriteVecExtractSt, [AtomPort0]>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// SSE42 String instructions.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -385,23 +385,12 @@ defm : JWriteResFpuPair<WriteVarShuffle256, [JFPU01, JVALU], 1>; // NOTE: Doesn'
|
|||
defm : JWriteResFpuPair<WriteVarVecShift, [JFPU01, JVALU], 1>; // NOTE: Doesn't exist on Jaguar.
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Vector Extraction instructions.
|
||||
// Vector insert/extract operations.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
def JWritePEXTR : SchedWriteRes<[JFPU0, JFPA, JALU0]> { let Latency = 3; }
|
||||
def : InstRW<[JWritePEXTR], (instrs MMX_PEXTRWrr,
|
||||
EXTRACTPSrr, VEXTRACTPSrr,
|
||||
PEXTRBrr, VPEXTRBrr,
|
||||
PEXTRDrr, VPEXTRDrr,
|
||||
PEXTRQrr, VPEXTRQrr,
|
||||
PEXTRWrr, VPEXTRWrr, PEXTRWrr_REV, VPEXTRWrr_REV)>;
|
||||
|
||||
def JWritePEXTRSt : SchedWriteRes<[JFPU1, JSTC, JSAGU]> { let Latency = 3; }
|
||||
def : InstRW<[JWritePEXTRSt], (instrs EXTRACTPSmr, VEXTRACTPSmr,
|
||||
PEXTRBmr, VPEXTRBmr,
|
||||
PEXTRDmr, VPEXTRDmr,
|
||||
PEXTRQmr, VPEXTRQmr,
|
||||
PEXTRWmr, VPEXTRWmr)>;
|
||||
defm : JWriteResFpuPair<WriteVecInsert, [JFPU01, JVALU], 1>;
|
||||
def : WriteRes<WriteVecExtract, [JFPU0, JFPA, JALU0]> { let Latency = 3; }
|
||||
def : WriteRes<WriteVecExtractSt, [JFPU1, JSTC, JSAGU]> { let Latency = 3; }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// SSE42 String instructions.
|
||||
|
|
|
@ -164,6 +164,16 @@ defm : SLMWriteResPair<WriteBlend, [SLM_FPC_RSV0], 1>;
|
|||
defm : SLMWriteResPair<WriteMPSAD, [SLM_FPC_RSV0], 7>;
|
||||
defm : SLMWriteResPair<WritePSADBW, [SLM_FPC_RSV0], 4>;
|
||||
|
||||
// Vector insert/extract operations.
|
||||
defm : SLMWriteResPair<WriteVecInsert, [SLM_FPC_RSV0], 1>;
|
||||
|
||||
def : WriteRes<WriteVecExtract, [SLM_FPC_RSV0]>;
|
||||
def : WriteRes<WriteVecExtractSt, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
|
||||
let Latency = 4;
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1, 2];
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Horizontal add/sub instructions.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -233,6 +233,19 @@ defm : ZnWriteResFpuPair<WritePSADBW, [ZnFPU0], 3>;
|
|||
// Vector Shift Operations
|
||||
defm : ZnWriteResFpuPair<WriteVarVecShift, [ZnFPU12], 1>;
|
||||
|
||||
// Vector insert/extract operations.
|
||||
defm : ZnWriteResFpuPair<WriteVecInsert, [ZnFPU], 1>;
|
||||
|
||||
def : WriteRes<WriteVecExtract, [ZnFPU12, ZnFPU2]> {
|
||||
let Latency = 2;
|
||||
let ResourceCycles = [1, 2];
|
||||
}
|
||||
def : WriteRes<WriteVecExtractSt, [ZnAGU, ZnFPU12, ZnFPU2]> {
|
||||
let Latency = 5;
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1, 2, 3];
|
||||
}
|
||||
|
||||
// MOVMSK Instructions.
|
||||
def : WriteRes<WriteFMOVMSK, [ZnFPU2]>;
|
||||
def : WriteRes<WriteVecMOVMSK, [ZnFPU2]>;
|
||||
|
@ -987,22 +1000,6 @@ def ZnWritePMOVMSKBY : SchedWriteRes<[ZnFPU2]> {
|
|||
}
|
||||
def : InstRW<[ZnWritePMOVMSKBY], (instregex "(V|MMX_)?PMOVMSKBYrr")>;
|
||||
|
||||
// PEXTR B/W/D/Q.
|
||||
// r32,x,i.
|
||||
def ZnWritePEXTRr : SchedWriteRes<[ZnFPU12, ZnFPU2]> {
|
||||
let Latency = 2;
|
||||
let ResourceCycles = [1, 2];
|
||||
}
|
||||
def : InstRW<[ZnWritePEXTRr], (instregex "(V?)PEXTR(B|W|D|Q)rr", "MMX_PEXTRWrr")>;
|
||||
|
||||
def ZnWritePEXTRm : SchedWriteRes<[ZnAGU, ZnFPU12, ZnFPU2]> {
|
||||
let Latency = 5;
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1, 2, 3];
|
||||
}
|
||||
// m8,x,i.
|
||||
def : InstRW<[ZnWritePEXTRm], (instregex "(V?)PEXTR(B|W|D|Q)mr")>;
|
||||
|
||||
// VPBROADCAST B/W.
|
||||
// x, m8/16.
|
||||
def ZnWriteVPBROADCAST128Ld : SchedWriteRes<[ZnAGU, ZnFPU12]> {
|
||||
|
|
|
@ -2978,7 +2978,7 @@ declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
|
|||
define i32 @test_pextrw(x86_mmx %a0) optsize {
|
||||
; GENERIC-LABEL: test_pextrw:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: pextrw $0, %mm0, %eax # sched: [1:1.00]
|
||||
; GENERIC-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; ATOM-LABEL: test_pextrw:
|
||||
|
@ -2993,7 +2993,7 @@ define i32 @test_pextrw(x86_mmx %a0) optsize {
|
|||
;
|
||||
; SANDY-LABEL: test_pextrw:
|
||||
; SANDY: # %bb.0:
|
||||
; SANDY-NEXT: pextrw $0, %mm0, %eax # sched: [1:1.00]
|
||||
; SANDY-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00]
|
||||
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; HASWELL-LABEL: test_pextrw:
|
||||
|
@ -3501,9 +3501,9 @@ declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
|
|||
define i64 @test_pinsrw(x86_mmx %a0, i32 %a1, i16* %a2) optsize {
|
||||
; GENERIC-LABEL: test_pinsrw:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: pinsrw $0, %edi, %mm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:1.00]
|
||||
; GENERIC-NEXT: movswl (%rsi), %eax # sched: [5:0.50]
|
||||
; GENERIC-NEXT: pinsrw $1, %eax, %mm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:1.00]
|
||||
; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
|
@ -3525,9 +3525,9 @@ define i64 @test_pinsrw(x86_mmx %a0, i32 %a1, i16* %a2) optsize {
|
|||
;
|
||||
; SANDY-LABEL: test_pinsrw:
|
||||
; SANDY: # %bb.0:
|
||||
; SANDY-NEXT: pinsrw $0, %edi, %mm0 # sched: [1:1.00]
|
||||
; SANDY-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:1.00]
|
||||
; SANDY-NEXT: movswl (%rsi), %eax # sched: [5:0.50]
|
||||
; SANDY-NEXT: pinsrw $1, %eax, %mm0 # sched: [1:1.00]
|
||||
; SANDY-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:1.00]
|
||||
; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33]
|
||||
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
|
|
|
@ -1903,7 +1903,7 @@ define i32 @test_pextrw(<8 x i16> %a0, i16 *%a1) {
|
|||
; GENERIC-LABEL: test_pextrw:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00]
|
||||
; GENERIC-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [6:1.00]
|
||||
; GENERIC-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SLM-LABEL: test_pextrw:
|
||||
|
@ -1915,7 +1915,7 @@ define i32 @test_pextrw(<8 x i16> %a0, i16 *%a1) {
|
|||
; SANDY-SSE-LABEL: test_pextrw:
|
||||
; SANDY-SSE: # %bb.0:
|
||||
; SANDY-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00]
|
||||
; SANDY-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [6:1.00]
|
||||
; SANDY-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
|
||||
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SANDY-LABEL: test_pextrw:
|
||||
|
|
|
@ -268,9 +268,9 @@ xorps (%rax), %xmm2
|
|||
# CHECK-NEXT: 2 8 1.00 * pavgb (%rax), %mm2
|
||||
# CHECK-NEXT: 1 3 1.00 pavgw %mm0, %mm2
|
||||
# CHECK-NEXT: 2 8 1.00 * pavgw (%rax), %mm2
|
||||
# CHECK-NEXT: 1 1 1.00 pextrw $1, %mm0, %ecx
|
||||
# CHECK-NEXT: 1 1 1.00 pinsrw $1, %eax, %mm2
|
||||
# CHECK-NEXT: 2 6 1.00 * pinsrw $1, (%rax), %mm2
|
||||
# CHECK-NEXT: 2 3 1.00 pextrw $1, %mm0, %ecx
|
||||
# CHECK-NEXT: 2 2 1.00 pinsrw $1, %eax, %mm2
|
||||
# CHECK-NEXT: 2 7 0.50 * pinsrw $1, (%rax), %mm2
|
||||
# CHECK-NEXT: 1 3 1.00 pmaxsw %mm0, %mm2
|
||||
# CHECK-NEXT: 2 8 1.00 * pmaxsw (%rax), %mm2
|
||||
# CHECK-NEXT: 1 3 1.00 pmaxub %mm0, %mm2
|
||||
|
@ -331,7 +331,7 @@ xorps (%rax), %xmm2
|
|||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
|
||||
# CHECK-NEXT: - 112.00 40.00 54.00 10.00 35.00 33.50 33.50
|
||||
# CHECK-NEXT: - 112.00 41.00 55.50 10.00 34.50 33.50 33.50
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
|
||||
|
@ -409,9 +409,9 @@ xorps (%rax), %xmm2
|
|||
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pavgb (%rax), %mm2
|
||||
# CHECK-NEXT: - - - 1.00 - - - - pavgw %mm0, %mm2
|
||||
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pavgw (%rax), %mm2
|
||||
# CHECK-NEXT: - - - - - 1.00 - - pextrw $1, %mm0, %ecx
|
||||
# CHECK-NEXT: - - - - - 1.00 - - pinsrw $1, %eax, %mm2
|
||||
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 pinsrw $1, (%rax), %mm2
|
||||
# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - pextrw $1, %mm0, %ecx
|
||||
# CHECK-NEXT: - - - 0.50 - 1.50 - - pinsrw $1, %eax, %mm2
|
||||
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pinsrw $1, (%rax), %mm2
|
||||
# CHECK-NEXT: - - - 1.00 - - - - pmaxsw %mm0, %mm2
|
||||
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pmaxsw (%rax), %mm2
|
||||
# CHECK-NEXT: - - - 1.00 - - - - pmaxub %mm0, %mm2
|
||||
|
|
|
@ -188,7 +188,7 @@ roundss $1, (%rax), %xmm2
|
|||
# CHECK-NEXT: 4 5 1.00 * pextrd $1, %xmm0, (%rax)
|
||||
# CHECK-NEXT: 2 3 1.00 pextrq $1, %xmm0, %rcx
|
||||
# CHECK-NEXT: 4 5 1.00 * pextrq $1, %xmm0, (%rax)
|
||||
# CHECK-NEXT: 3 6 1.00 * pextrw $1, %xmm0, (%rax)
|
||||
# CHECK-NEXT: 3 5 1.00 * pextrw $1, %xmm0, (%rax)
|
||||
# CHECK-NEXT: 1 5 1.00 phminposuw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 2 11 1.00 * phminposuw (%rax), %xmm2
|
||||
# CHECK-NEXT: 2 2 1.00 pinsrb $1, %eax, %xmm1
|
||||
|
@ -264,7 +264,7 @@ roundss $1, (%rax), %xmm2
|
|||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
|
||||
# CHECK-NEXT: - - 26.00 47.00 5.00 53.00 25.00 25.00
|
||||
# CHECK-NEXT: - - 26.00 47.50 5.00 52.50 24.50 24.50
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
|
||||
|
@ -301,7 +301,7 @@ roundss $1, (%rax), %xmm2
|
|||
# CHECK-NEXT: - - 1.00 0.50 1.00 0.50 0.50 0.50 pextrd $1, %xmm0, (%rax)
|
||||
# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - pextrq $1, %xmm0, %rcx
|
||||
# CHECK-NEXT: - - 1.00 0.50 1.00 0.50 0.50 0.50 pextrq $1, %xmm0, (%rax)
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 1.00 1.00 pextrw $1, %xmm0, (%rax)
|
||||
# CHECK-NEXT: - - - 0.50 1.00 0.50 0.50 0.50 pextrw $1, %xmm0, (%rax)
|
||||
# CHECK-NEXT: - - 1.00 - - - - - phminposuw %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 phminposuw (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 0.50 - 1.50 - - pinsrb $1, %eax, %xmm1
|
||||
|
|
Loading…
Reference in New Issue