diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index f18490fee88d..50c4e8e304ac 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -1085,14 +1085,14 @@ def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src1, u8imm:$src2), "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, - EVEX, VEX_WIG, Sched<[WriteFBlend]>; + EVEX, VEX_WIG, Sched<[WriteVecExtract]>; def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs), (ins f32mem:$dst, VR128X:$src1, u8imm:$src2), "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2), addr:$dst)]>, - EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteFBlendLd, WriteRMW]>; + EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>; //===---------------------------------------------------------------------===// // AVX-512 BROADCAST @@ -9878,7 +9878,7 @@ multiclass avx512_extract_elt_bw_m opc, string OpcodeStr, SDNode OpNode, OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))), addr:$dst)]>, - EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteShuffleLd, WriteRMW]>; + EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>; } multiclass avx512_extract_elt_b { @@ -9888,7 +9888,7 @@ multiclass avx512_extract_elt_b { OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32orGR64:$dst, (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>, - EVEX, TAPD, Sched<[WriteShuffle]>; + EVEX, TAPD, Sched<[WriteVecExtract]>; defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD; } @@ -9901,14 +9901,14 @@ multiclass avx512_extract_elt_w { OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32orGR64:$dst, (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>, - EVEX, PD, Sched<[WriteShuffle]>; + EVEX, PD, Sched<[WriteVecExtract]>; let hasSideEffects = 0 in def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst), (ins _.RC:$src1, u8imm:$src2), OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX, TAPD, FoldGenData, - Sched<[WriteShuffle]>; + Sched<[WriteVecExtract]>; defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD; } @@ -9922,7 +9922,7 @@ multiclass avx512_extract_elt_dq, - EVEX, TAPD, Sched<[WriteShuffle]>; + EVEX, TAPD, Sched<[WriteVecExtract]>; def mr : AVX512Ii8<0x16, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), @@ -9930,7 +9930,7 @@ multiclass avx512_extract_elt_dq, EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD, - Sched<[WriteShuffleLd, WriteRMW]>; + Sched<[WriteVecExtractSt]>; } } @@ -9946,7 +9946,7 @@ multiclass avx512_insert_elt_m opc, string OpcodeStr, SDNode OpNode, OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>, - EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteShuffleLd, ReadAfterLd]>; + EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsertLd, ReadAfterLd]>; } multiclass avx512_insert_elt_bw opc, string OpcodeStr, SDNode OpNode, @@ -9957,7 +9957,7 @@ multiclass avx512_insert_elt_bw opc, string OpcodeStr, SDNode OpNode, OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set _.RC:$dst, (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V, - Sched<[WriteShuffle]>; + Sched<[WriteVecInsert]>; defm NAME : avx512_insert_elt_m; } @@ -9971,7 +9971,7 @@ multiclass avx512_insert_elt_dq opc, string OpcodeStr, OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set _.RC:$dst, (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>, - EVEX_4V, TAPD, Sched<[WriteShuffle]>; + EVEX_4V, TAPD, Sched<[WriteVecInsert]>; defm NAME : avx512_insert_elt_m, TAPD; diff --git a/llvm/lib/Target/X86/X86InstrMMX.td b/llvm/lib/Target/X86/X86InstrMMX.td index 85e4f187d7bf..c4d7ead54885 100644 --- a/llvm/lib/Target/X86/X86InstrMMX.td +++ b/llvm/lib/Target/X86/X86InstrMMX.td @@ -528,7 +528,7 @@ def MMX_PEXTRWrr: MMXIi8<0xC5, MRMSrcReg, "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32orGR64:$dst, (int_x86_mmx_pextr_w VR64:$src1, imm:$src2))]>, - Sched<[WriteShuffle]>; + Sched<[WriteVecExtract]>; let Constraints = "$src1 = $dst" in { let Predicates = [HasSSE1] in { def MMX_PINSRWrr : MMXIi8<0xC4, MRMSrcReg, @@ -537,7 +537,7 @@ let Predicates = [HasSSE1] in { "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1, GR32orGR64:$src2, imm:$src3))]>, - Sched<[WriteShuffle]>; + Sched<[WriteVecInsert]>; def MMX_PINSRWrm : MMXIi8<0xC4, MRMSrcMem, (outs VR64:$dst), @@ -546,7 +546,7 @@ let Predicates = [HasSSE1] in { [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1, (i32 (anyext (loadi16 addr:$src2))), imm:$src3))]>, - Sched<[WriteShuffleLd, ReadAfterLd]>; + Sched<[WriteVecInsertLd, ReadAfterLd]>; } } diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 1e6cc24c9883..aff0cc942044 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -3782,7 +3782,7 @@ multiclass sse2_pinsrw { "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, (X86pinsrw VR128:$src1, GR32orGR64:$src2, imm:$src3))]>, - Sched<[WriteShuffle]>; + Sched<[WriteVecInsert]>; def rm : Ii8<0xC4, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i16mem:$src2, u8imm:$src3), @@ -3792,7 +3792,7 @@ multiclass sse2_pinsrw { [(set VR128:$dst, (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), imm:$src3))]>, - Sched<[WriteShuffleLd, ReadAfterLd]>; + Sched<[WriteVecInsertLd, ReadAfterLd]>; } // Extract @@ -3802,13 +3802,13 @@ def VPEXTRWrr : Ii8<0xC5, MRMSrcReg, "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1), imm:$src2))]>, - PD, VEX, Sched<[WriteShuffle]>; + PD, VEX, Sched<[WriteVecExtract]>; def PEXTRWrr : PDIi8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2), "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1), imm:$src2))]>, - Sched<[WriteShuffle]>; + Sched<[WriteVecExtract]>; // Insert let Predicates = [HasAVX, NoBWI] in @@ -5085,15 +5085,14 @@ multiclass SS41I_extract8 opc, string OpcodeStr> { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set GR32orGR64:$dst, (X86pextrb (v16i8 VR128:$src1), imm:$src2))]>, - Sched<[WriteShuffle]>; - let hasSideEffects = 0, mayStore = 1, - SchedRW = [WriteShuffleLd, WriteRMW] in + Sched<[WriteVecExtract]>; + let hasSideEffects = 0, mayStore = 1 in def mr : SS4AIi8; + addr:$dst)]>, Sched<[WriteVecExtractSt]>; } let Predicates = [HasAVX, NoBWI] in @@ -5109,16 +5108,15 @@ multiclass SS41I_extract16 opc, string OpcodeStr> { (ins VR128:$src1, u8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, - Sched<[WriteShuffle]>, FoldGenData; + Sched<[WriteVecExtract]>, FoldGenData; - let hasSideEffects = 0, mayStore = 1, - SchedRW = [WriteShuffleLd, WriteRMW] in + let hasSideEffects = 0, mayStore = 1 in def mr : SS4AIi8; + addr:$dst)]>, Sched<[WriteVecExtractSt]>; } let Predicates = [HasAVX, NoBWI] in @@ -5135,14 +5133,13 @@ multiclass SS41I_extract32 opc, string OpcodeStr> { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set GR32:$dst, (extractelt (v4i32 VR128:$src1), imm:$src2))]>, - Sched<[WriteShuffle]>; - let SchedRW = [WriteShuffleLd, WriteRMW] in + Sched<[WriteVecExtract]>; def mr : SS4AIi8; + addr:$dst)]>, Sched<[WriteVecExtractSt]>; } let Predicates = [HasAVX, NoDQI] in @@ -5158,14 +5155,13 @@ multiclass SS41I_extract64 opc, string OpcodeStr> { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set GR64:$dst, (extractelt (v2i64 VR128:$src1), imm:$src2))]>, - Sched<[WriteShuffle]>; - let SchedRW = [WriteShuffleLd, WriteRMW] in + Sched<[WriteVecExtract]>; def mr : SS4AIi8; + addr:$dst)]>, Sched<[WriteVecExtractSt]>; } let Predicates = [HasAVX, NoDQI] in @@ -5182,14 +5178,13 @@ multiclass SS41I_extractf32 opc, string OpcodeStr> { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>, - Sched<[WriteFBlend]>; - let SchedRW = [WriteFBlendLd, WriteRMW] in + Sched<[WriteVecExtract]>; def mr : SS4AIi8; + addr:$dst)]>, Sched<[WriteVecExtractSt]>; } let ExeDomain = SSEPackedSingle in { @@ -5223,7 +5218,7 @@ multiclass SS41I_insert8 opc, string asm, bit Is2Addr = 1> { "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, (X86pinsrb VR128:$src1, GR32orGR64:$src2, imm:$src3))]>, - Sched<[WriteShuffle]>; + Sched<[WriteVecInsert]>; def rm : SS4AIi8 opc, string asm, bit Is2Addr = 1> { "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), - imm:$src3))]>, Sched<[WriteShuffleLd, ReadAfterLd]>; + imm:$src3))]>, Sched<[WriteVecInsertLd, ReadAfterLd]>; } let Predicates = [HasAVX, NoBWI] in @@ -5249,7 +5244,7 @@ multiclass SS41I_insert32 opc, string asm, bit Is2Addr = 1> { "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>, - Sched<[WriteShuffle]>; + Sched<[WriteVecInsert]>; def rm : SS4AIi8 opc, string asm, bit Is2Addr = 1> { "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2), - imm:$src3)))]>, Sched<[WriteShuffleLd, ReadAfterLd]>; + imm:$src3)))]>, Sched<[WriteVecInsertLd, ReadAfterLd]>; } let Predicates = [HasAVX, NoDQI] in @@ -5275,7 +5270,7 @@ multiclass SS41I_insert64 opc, string asm, bit Is2Addr = 1> { "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>, - Sched<[WriteShuffle]>; + Sched<[WriteVecInsert]>; def rm : SS4AIi8 opc, string asm, bit Is2Addr = 1> { "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2), - imm:$src3)))]>, Sched<[WriteShuffleLd, ReadAfterLd]>; + imm:$src3)))]>, Sched<[WriteVecInsertLd, ReadAfterLd]>; } let Predicates = [HasAVX, NoDQI] in diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 2f77e079d8a8..496a588013c5 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -12,7 +12,7 @@ // //===----------------------------------------------------------------------===// def BroadwellModel : SchedMachineModel { - // All x86 instructions are modeled as a single micro-op, and HW can decode 4 + // All x86 instructions are modeled as a single micro-op, and BW can decode 4 // instructions per cycle. let IssueWidth = 4; let MicroOpBufferSize = 192; // Based on the reorder buffer. @@ -190,6 +190,26 @@ defm : BWWriteResPair; // Vector variab defm : BWWriteResPair; // Vector MPSAD. defm : BWWriteResPair; // Vector PSADBW. +// Vector insert/extract operations. +def : WriteRes { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def : WriteRes { + let Latency = 6; + let NumMicroOps = 2; +} + +def : WriteRes { + let Latency = 2; + let NumMicroOps = 2; +} +def : WriteRes { + let Latency = 2; + let NumMicroOps = 3; +} + // Conversion between integer and float. defm : BWWriteResPair; // Float -> Integer. defm : BWWriteResPair; // Integer -> Float. @@ -462,17 +482,6 @@ def: InstRW<[BWWriteResGroup10], (instregex "FBSTPm", "(V?)MOVUPD(Y?)mr", "(V?)MOVUPS(Y?)mr")>; -def BWWriteResGroup11 : SchedWriteRes<[BWPort5]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [2]; -} -def: InstRW<[BWWriteResGroup11], (instregex "MMX_PINSRWrr", - "(V?)PINSRBrr", - "(V?)PINSRDrr", - "(V?)PINSRQrr", - "(V?)PINSRWrr")>; - def BWWriteResGroup12 : SchedWriteRes<[BWPort01]> { let Latency = 2; let NumMicroOps = 2; @@ -505,15 +514,9 @@ def BWWriteResGroup15 : SchedWriteRes<[BWPort0,BWPort5]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[BWWriteResGroup15], (instregex "MMX_PEXTRWrr", - "VCVTPH2PS(Y?)rr", +def: InstRW<[BWWriteResGroup15], (instregex "VCVTPH2PS(Y?)rr", "(V?)CVTPS2PDrr", "(V?)CVTSS2SDrr", - "(V?)EXTRACTPSrr", - "(V?)PEXTRBrr", - "(V?)PEXTRDrr", - "(V?)PEXTRQrr", - "(V?)PEXTRWrr", "(V?)PSLLDrr", "(V?)PSLLQrr", "(V?)PSLLWrr", @@ -573,17 +576,6 @@ def: InstRW<[BWWriteResGroup20], (instregex "ADC8i8", "SBB8ri", "SET(A|BE)r")>; -def BWWriteResGroup21 : SchedWriteRes<[BWPort4,BWPort5,BWPort237]> { - let Latency = 2; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[BWWriteResGroup21], (instregex "(V?)EXTRACTPSmr", - "(V?)PEXTRBmr", - "(V?)PEXTRDmr", - "(V?)PEXTRQmr", - "(V?)PEXTRWmr")>; - def BWWriteResGroup22 : SchedWriteRes<[BWPort4,BWPort6,BWPort237]> { let Latency = 2; let NumMicroOps = 3; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 5ab18d344d68..296eafa09dfa 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -189,6 +189,26 @@ defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +// Vector insert/extract operations. +def : WriteRes { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def : WriteRes { + let Latency = 6; + let NumMicroOps = 2; +} + +def : WriteRes { + let Latency = 2; + let NumMicroOps = 2; +} +def : WriteRes { + let Latency = 2; + let NumMicroOps = 3; +} + // String instructions. // Packed Compare Implicit Length Strings, Return Mask @@ -1092,17 +1112,6 @@ def HWWriteResGroup19 : SchedWriteRes<[HWPort237,HWPort0156]> { } def: InstRW<[HWWriteResGroup19], (instregex "SFENCE")>; -def HWWriteResGroup20 : SchedWriteRes<[HWPort4,HWPort5,HWPort237]> { - let Latency = 2; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[HWWriteResGroup20], (instregex "(V?)EXTRACTPSmr", - "(V?)PEXTRBmr", - "(V?)PEXTRDmr", - "(V?)PEXTRQmr", - "(V?)PEXTRWmr")>; - def HWWriteResGroup21 : SchedWriteRes<[HWPort4,HWPort6,HWPort237]> { let Latency = 2; let NumMicroOps = 3; @@ -1160,17 +1169,6 @@ def HWWriteResGroup26 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0156]> { def: InstRW<[HWWriteResGroup26], (instregex "POP(16|32|64)rmm", "PUSH(16|32|64)rmm")>; -def HWWriteResGroup27 : SchedWriteRes<[HWPort5]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [2]; -} -def: InstRW<[HWWriteResGroup27], (instregex "MMX_PINSRWrr", - "(V?)PINSRBrr", - "(V?)PINSRDrr", - "(V?)PINSRQrr", - "(V?)PINSRWrr")>; - def HWWriteResGroup28 : SchedWriteRes<[HWPort01]> { let Latency = 2; let NumMicroOps = 2; @@ -1203,16 +1201,10 @@ def HWWriteResGroup31 : SchedWriteRes<[HWPort0,HWPort5]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[HWWriteResGroup31], (instregex "MMX_PEXTRWrr", - "VCVTPH2PSYrr", +def: InstRW<[HWWriteResGroup31], (instregex "VCVTPH2PSYrr", "VCVTPH2PSrr", "(V?)CVTPS2PDrr", "(V?)CVTSS2SDrr", - "(V?)EXTRACTPSrr", - "(V?)PEXTRBrr", - "(V?)PEXTRDrr", - "(V?)PEXTRQrr", - "(V?)PEXTRWrr", "(V?)PSLLDrr", "(V?)PSLLQrr", "(V?)PSLLWrr", diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index eca97110bcc1..03b6f87a2ea2 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -173,6 +173,25 @@ defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; +// Vector insert/extract operations. +def : WriteRes { + let Latency = 2; + let NumMicroOps = 2; +} +def : WriteRes { + let Latency = 7; + let NumMicroOps = 2; +} + +def : WriteRes { + let Latency = 3; + let NumMicroOps = 2; +} +def : WriteRes { + let Latency = 5; + let NumMicroOps = 3; +} + //////////////////////////////////////////////////////////////////////////////// // Horizontal add/sub instructions. //////////////////////////////////////////////////////////////////////////////// @@ -535,16 +554,6 @@ def SBWriteResGroup16_1 : SchedWriteRes<[SBPort1]> { } def: InstRW<[SBWriteResGroup16_1], (instrs BSWAP32r)>; -def SBWriteResGroup17 : SchedWriteRes<[SBPort5,SBPort15]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup17], (instregex "(V?)PINSRBrr", - "(V?)PINSRDrr", - "(V?)PINSRQrr", - "(V?)PINSRWrr")>; - def SBWriteResGroup18 : SchedWriteRes<[SBPort5,SBPort015]> { let Latency = 2; let NumMicroOps = 2; @@ -590,16 +599,6 @@ def SBWriteResGroup22 : SchedWriteRes<[SBPort0,SBPort5]> { } def: InstRW<[SBWriteResGroup22], (instregex "(V?)EXTRACTPSrr")>; -def SBWriteResGroup23 : SchedWriteRes<[SBPort0,SBPort15]> { - let Latency = 3; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup23], (instregex "(V?)PEXTRBrr", - "(V?)PEXTRDrr", - "(V?)PEXTRQrr", - "(V?)PEXTRWrr")>; - def SBWriteResGroup23_2 : SchedWriteRes<[SBPort05]> { let Latency = 3; let NumMicroOps = 3; @@ -793,15 +792,6 @@ def SBWriteResGroup37 : SchedWriteRes<[SBPort4,SBPort01,SBPort23]> { def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPD(Y?)mr", "VMASKMOVPS(Y?)mr")>; -def SBWriteResGroup39 : SchedWriteRes<[SBPort4,SBPort23,SBPort15]> { - let Latency = 5; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup39], (instregex "(V?)PEXTRBmr", - "VPEXTRDmr", - "VPEXTRWmr")>; - def SBWriteResGroup40 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> { let Latency = 5; let NumMicroOps = 3; @@ -1009,10 +999,6 @@ def: InstRW<[SBWriteResGroup59], (instregex "MMX_PADDQirm", "(V?)PCMPGTBrm", "(V?)PCMPGTDrm", "(V?)PCMPGTWrm", - "(V?)PINSRBrm", - "(V?)PINSRDrm", - "(V?)PINSRQrm", - "(V?)PINSRWrm", "(V?)PMAXSBrm", "(V?)PMAXSDrm", "(V?)PMAXSWrm", diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 69828340ae60..08edc433a9ed 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -187,6 +187,26 @@ defm : SKLWriteResPair; // Vector va defm : SKLWriteResPair; // Vector MPSAD. defm : SKLWriteResPair; // Vector PSADBW. +// Vector insert/extract operations. +def : WriteRes { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def : WriteRes { + let Latency = 6; + let NumMicroOps = 2; +} + +def : WriteRes { + let Latency = 3; + let NumMicroOps = 2; +} +def : WriteRes { + let Latency = 2; + let NumMicroOps = 3; +} + // Conversion between integer and float. defm : SKLWriteResPair; // Float -> Integer. defm : SKLWriteResPair; // Integer -> Float. @@ -571,12 +591,7 @@ def SKLWriteResGroup13 : SchedWriteRes<[SKLPort5]> { let NumMicroOps = 2; let ResourceCycles = [2]; } -def: InstRW<[SKLWriteResGroup13], (instregex "MMX_MOVQ2DQrr", - "MMX_PINSRWrr", - "(V?)PINSRBrr", - "(V?)PINSRDrr", - "(V?)PINSRQrr", - "(V?)PINSRWrr")>; +def: InstRW<[SKLWriteResGroup13], (instregex "MMX_MOVQ2DQrr")>; def SKLWriteResGroup14 : SchedWriteRes<[SKLPort05]> { let Latency = 2; @@ -671,17 +686,6 @@ def: InstRW<[SKLWriteResGroup23], (instregex "ADC8i8", "SBB8i8", "SBB8ri")>; -def SKLWriteResGroup24 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237]> { - let Latency = 2; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup24], (instregex "(V?)EXTRACTPSmr", - "(V?)PEXTRBmr", - "(V?)PEXTRDmr", - "(V?)PEXTRQmr", - "(V?)PEXTRWmr")>; - def SKLWriteResGroup25 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort237]> { let Latency = 2; let NumMicroOps = 3; @@ -761,13 +765,7 @@ def SKLWriteResGroup31 : SchedWriteRes<[SKLPort0,SKLPort5]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup31], (instregex "MMX_PEXTRWrr", - "(V?)EXTRACTPSrr", - "(V?)PEXTRBrr", - "(V?)PEXTRDrr", - "(V?)PEXTRQrr", - "(V?)PEXTRWrr", - "(V?)PTEST(Y?)rr")>; +def: InstRW<[SKLWriteResGroup31], (instregex "(V?)PTEST(Y?)rr")>; def SKLWriteResGroup32 : SchedWriteRes<[SKLPort0,SKLPort0156]> { let Latency = 3; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 14982936ffa6..afe255667e7e 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -187,6 +187,26 @@ defm : SKXWriteResPair; // Vector var defm : SKXWriteResPair; // Vector MPSAD. defm : SKXWriteResPair; // Vector PSADBW. +// Vector insert/extract operations. +def : WriteRes { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def : WriteRes { + let Latency = 6; + let NumMicroOps = 2; +} + +def : WriteRes { + let Latency = 3; + let NumMicroOps = 2; +} +def : WriteRes { + let Latency = 2; + let NumMicroOps = 3; +} + // Conversion between integer and float. defm : SKXWriteResPair; // Float -> Integer. defm : SKXWriteResPair; // Integer -> Float. @@ -1035,20 +1055,7 @@ def SKXWriteResGroup13 : SchedWriteRes<[SKXPort5]> { let NumMicroOps = 2; let ResourceCycles = [2]; } -def: InstRW<[SKXWriteResGroup13], (instregex "MMX_MOVQ2DQrr", - "MMX_PINSRWrr", - "PINSRBrr", - "PINSRDrr", - "PINSRQrr", - "PINSRWrr", - "VPINSRBZrr", - "VPINSRBrr", - "VPINSRDZrr", - "VPINSRDrr", - "VPINSRQZrr", - "VPINSRQrr", - "VPINSRWZrr", - "VPINSRWrr")>; +def: InstRW<[SKXWriteResGroup13], (instregex "MMX_MOVQ2DQrr")>; def SKXWriteResGroup14 : SchedWriteRes<[SKXPort05]> { let Latency = 2; @@ -1163,27 +1170,6 @@ def: InstRW<[SKXWriteResGroup23], (instregex "ADC8i8", "SBB8i8", "SBB8ri")>; -def SKXWriteResGroup24 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> { - let Latency = 2; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKXWriteResGroup24], (instregex "EXTRACTPSmr", - "PEXTRBmr", - "PEXTRDmr", - "PEXTRQmr", - "PEXTRWmr", - "VEXTRACTPSZmr(b?)", - "VEXTRACTPSmr", - "VPEXTRBZmr(b?)", - "VPEXTRBmr", - "VPEXTRDZmr(b?)", - "VPEXTRDmr", - "VPEXTRQZmr(b?)", - "VPEXTRQmr", - "VPEXTRWZmr(b?)", - "VPEXTRWmr")>; - def SKXWriteResGroup25 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort237]> { let Latency = 2; let NumMicroOps = 3; @@ -1455,25 +1441,7 @@ def SKXWriteResGroup33 : SchedWriteRes<[SKXPort0,SKXPort5]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKXWriteResGroup33], (instregex "EXTRACTPSrr", - "MMX_PEXTRWrr", - "PEXTRBrr", - "PEXTRDrr", - "PEXTRQrr", - "PEXTRWrr", - "PTESTrr", - "VEXTRACTPSZrr", - "VEXTRACTPSrr", - "VPEXTRBZrr", - "VPEXTRBrr", - "VPEXTRDZrr", - "VPEXTRDrr", - "VPEXTRQZrr", - "VPEXTRQrr", - "VPEXTRWZrr", - "VPEXTRWrr", - "VPTESTYrr", - "VPTESTrr")>; +def: InstRW<[SKXWriteResGroup33], (instregex "(V?)PTEST(Y?)rr")>; def SKXWriteResGroup34 : SchedWriteRes<[SKXPort0,SKXPort0156]> { let Latency = 3; diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index 15243f90780d..4f5c9e2b1b9c 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -117,6 +117,11 @@ defm WriteVarBlend : X86SchedWritePair; // Vector variable blends. defm WritePSADBW : X86SchedWritePair; // Vector PSADBW. defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD. +// Vector insert/extract operations. +defm WriteVecInsert : X86SchedWritePair; // Insert gpr to vector element. +def WriteVecExtract : SchedWrite; // Extract vector element to gpr. +def WriteVecExtractSt : SchedWrite; // Extract vector element and store. + // MOVMSK operations. def WriteFMOVMSK : SchedWrite; def WriteVecMOVMSK : SchedWrite; diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index 359a2858c1d4..d4e704fa2c97 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -251,6 +251,14 @@ defm : AtomWriteResPair; // NOTE: defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. +//////////////////////////////////////////////////////////////////////////////// +// Vector insert/extract operations. +//////////////////////////////////////////////////////////////////////////////// + +defm : AtomWriteResPair; +def : WriteRes; +def : WriteRes; + //////////////////////////////////////////////////////////////////////////////// // SSE42 String instructions. //////////////////////////////////////////////////////////////////////////////// diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 2f2cca31437e..acd21518628e 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -385,23 +385,12 @@ defm : JWriteResFpuPair; // NOTE: Doesn' defm : JWriteResFpuPair; // NOTE: Doesn't exist on Jaguar. //////////////////////////////////////////////////////////////////////////////// -// Vector Extraction instructions. +// Vector insert/extract operations. //////////////////////////////////////////////////////////////////////////////// -def JWritePEXTR : SchedWriteRes<[JFPU0, JFPA, JALU0]> { let Latency = 3; } -def : InstRW<[JWritePEXTR], (instrs MMX_PEXTRWrr, - EXTRACTPSrr, VEXTRACTPSrr, - PEXTRBrr, VPEXTRBrr, - PEXTRDrr, VPEXTRDrr, - PEXTRQrr, VPEXTRQrr, - PEXTRWrr, VPEXTRWrr, PEXTRWrr_REV, VPEXTRWrr_REV)>; - -def JWritePEXTRSt : SchedWriteRes<[JFPU1, JSTC, JSAGU]> { let Latency = 3; } -def : InstRW<[JWritePEXTRSt], (instrs EXTRACTPSmr, VEXTRACTPSmr, - PEXTRBmr, VPEXTRBmr, - PEXTRDmr, VPEXTRDmr, - PEXTRQmr, VPEXTRQmr, - PEXTRWmr, VPEXTRWmr)>; +defm : JWriteResFpuPair; +def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 3; } //////////////////////////////////////////////////////////////////////////////// // SSE42 String instructions. diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index 73c868086102..3831b5e80707 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -164,6 +164,16 @@ defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +// Vector insert/extract operations. +defm : SLMWriteResPair; + +def : WriteRes; +def : WriteRes { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [1, 2]; +} + //////////////////////////////////////////////////////////////////////////////// // Horizontal add/sub instructions. //////////////////////////////////////////////////////////////////////////////// diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index daf0dfffd491..765f53834fb7 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -233,6 +233,19 @@ defm : ZnWriteResFpuPair; // Vector Shift Operations defm : ZnWriteResFpuPair; +// Vector insert/extract operations. +defm : ZnWriteResFpuPair; + +def : WriteRes { + let Latency = 2; + let ResourceCycles = [1, 2]; +} +def : WriteRes { + let Latency = 5; + let NumMicroOps = 2; + let ResourceCycles = [1, 2, 3]; +} + // MOVMSK Instructions. def : WriteRes; def : WriteRes; @@ -987,22 +1000,6 @@ def ZnWritePMOVMSKBY : SchedWriteRes<[ZnFPU2]> { } def : InstRW<[ZnWritePMOVMSKBY], (instregex "(V|MMX_)?PMOVMSKBYrr")>; -// PEXTR B/W/D/Q. -// r32,x,i. -def ZnWritePEXTRr : SchedWriteRes<[ZnFPU12, ZnFPU2]> { - let Latency = 2; - let ResourceCycles = [1, 2]; -} -def : InstRW<[ZnWritePEXTRr], (instregex "(V?)PEXTR(B|W|D|Q)rr", "MMX_PEXTRWrr")>; - -def ZnWritePEXTRm : SchedWriteRes<[ZnAGU, ZnFPU12, ZnFPU2]> { - let Latency = 5; - let NumMicroOps = 2; - let ResourceCycles = [1, 2, 3]; -} -// m8,x,i. -def : InstRW<[ZnWritePEXTRm], (instregex "(V?)PEXTR(B|W|D|Q)mr")>; - // VPBROADCAST B/W. // x, m8/16. def ZnWriteVPBROADCAST128Ld : SchedWriteRes<[ZnAGU, ZnFPU12]> { diff --git a/llvm/test/CodeGen/X86/mmx-schedule.ll b/llvm/test/CodeGen/X86/mmx-schedule.ll index 5fa5a15cf6d7..c08dc0e11d98 100644 --- a/llvm/test/CodeGen/X86/mmx-schedule.ll +++ b/llvm/test/CodeGen/X86/mmx-schedule.ll @@ -2978,7 +2978,7 @@ declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone define i32 @test_pextrw(x86_mmx %a0) optsize { ; GENERIC-LABEL: test_pextrw: ; GENERIC: # %bb.0: -; GENERIC-NEXT: pextrw $0, %mm0, %eax # sched: [1:1.00] +; GENERIC-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pextrw: @@ -2993,7 +2993,7 @@ define i32 @test_pextrw(x86_mmx %a0) optsize { ; ; SANDY-LABEL: test_pextrw: ; SANDY: # %bb.0: -; SANDY-NEXT: pextrw $0, %mm0, %eax # sched: [1:1.00] +; SANDY-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pextrw: @@ -3501,9 +3501,9 @@ declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test_pinsrw(x86_mmx %a0, i32 %a1, i16* %a2) optsize { ; GENERIC-LABEL: test_pinsrw: ; GENERIC: # %bb.0: -; GENERIC-NEXT: pinsrw $0, %edi, %mm0 # sched: [1:1.00] +; GENERIC-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:1.00] ; GENERIC-NEXT: movswl (%rsi), %eax # sched: [5:0.50] -; GENERIC-NEXT: pinsrw $1, %eax, %mm0 # sched: [1:1.00] +; GENERIC-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3525,9 +3525,9 @@ define i64 @test_pinsrw(x86_mmx %a0, i32 %a1, i16* %a2) optsize { ; ; SANDY-LABEL: test_pinsrw: ; SANDY: # %bb.0: -; SANDY-NEXT: pinsrw $0, %edi, %mm0 # sched: [1:1.00] +; SANDY-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:1.00] ; SANDY-NEXT: movswl (%rsi), %eax # sched: [5:0.50] -; SANDY-NEXT: pinsrw $1, %eax, %mm0 # sched: [1:1.00] +; SANDY-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; diff --git a/llvm/test/CodeGen/X86/sse41-schedule.ll b/llvm/test/CodeGen/X86/sse41-schedule.ll index 96da898e3967..50a4253ed023 100644 --- a/llvm/test/CodeGen/X86/sse41-schedule.ll +++ b/llvm/test/CodeGen/X86/sse41-schedule.ll @@ -1903,7 +1903,7 @@ define i32 @test_pextrw(<8 x i16> %a0, i16 *%a1) { ; GENERIC-LABEL: test_pextrw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00] -; GENERIC-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [6:1.00] +; GENERIC-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pextrw: @@ -1915,7 +1915,7 @@ define i32 @test_pextrw(<8 x i16> %a0, i16 *%a1) { ; SANDY-SSE-LABEL: test_pextrw: ; SANDY-SSE: # %bb.0: ; SANDY-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00] -; SANDY-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [6:1.00] +; SANDY-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-SSE-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_pextrw: diff --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse1.s index fdee64ab83db..46e1b47fc81e 100644 --- a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse1.s @@ -268,9 +268,9 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 2 8 1.00 * pavgb (%rax), %mm2 # CHECK-NEXT: 1 3 1.00 pavgw %mm0, %mm2 # CHECK-NEXT: 2 8 1.00 * pavgw (%rax), %mm2 -# CHECK-NEXT: 1 1 1.00 pextrw $1, %mm0, %ecx -# CHECK-NEXT: 1 1 1.00 pinsrw $1, %eax, %mm2 -# CHECK-NEXT: 2 6 1.00 * pinsrw $1, (%rax), %mm2 +# CHECK-NEXT: 2 3 1.00 pextrw $1, %mm0, %ecx +# CHECK-NEXT: 2 2 1.00 pinsrw $1, %eax, %mm2 +# CHECK-NEXT: 2 7 0.50 * pinsrw $1, (%rax), %mm2 # CHECK-NEXT: 1 3 1.00 pmaxsw %mm0, %mm2 # CHECK-NEXT: 2 8 1.00 * pmaxsw (%rax), %mm2 # CHECK-NEXT: 1 3 1.00 pmaxub %mm0, %mm2 @@ -331,7 +331,7 @@ xorps (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 112.00 40.00 54.00 10.00 35.00 33.50 33.50 +# CHECK-NEXT: - 112.00 41.00 55.50 10.00 34.50 33.50 33.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -409,9 +409,9 @@ xorps (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pavgb (%rax), %mm2 # CHECK-NEXT: - - - 1.00 - - - - pavgw %mm0, %mm2 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pavgw (%rax), %mm2 -# CHECK-NEXT: - - - - - 1.00 - - pextrw $1, %mm0, %ecx -# CHECK-NEXT: - - - - - 1.00 - - pinsrw $1, %eax, %mm2 -# CHECK-NEXT: - - - - - 1.00 0.50 0.50 pinsrw $1, (%rax), %mm2 +# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - pextrw $1, %mm0, %ecx +# CHECK-NEXT: - - - 0.50 - 1.50 - - pinsrw $1, %eax, %mm2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pinsrw $1, (%rax), %mm2 # CHECK-NEXT: - - - 1.00 - - - - pmaxsw %mm0, %mm2 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pmaxsw (%rax), %mm2 # CHECK-NEXT: - - - 1.00 - - - - pmaxub %mm0, %mm2 diff --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse41.s index 7f40244f26a7..76263ed3d15d 100644 --- a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse41.s +++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse41.s @@ -188,7 +188,7 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: 4 5 1.00 * pextrd $1, %xmm0, (%rax) # CHECK-NEXT: 2 3 1.00 pextrq $1, %xmm0, %rcx # CHECK-NEXT: 4 5 1.00 * pextrq $1, %xmm0, (%rax) -# CHECK-NEXT: 3 6 1.00 * pextrw $1, %xmm0, (%rax) +# CHECK-NEXT: 3 5 1.00 * pextrw $1, %xmm0, (%rax) # CHECK-NEXT: 1 5 1.00 phminposuw %xmm0, %xmm2 # CHECK-NEXT: 2 11 1.00 * phminposuw (%rax), %xmm2 # CHECK-NEXT: 2 2 1.00 pinsrb $1, %eax, %xmm1 @@ -264,7 +264,7 @@ roundss $1, (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - 26.00 47.00 5.00 53.00 25.00 25.00 +# CHECK-NEXT: - - 26.00 47.50 5.00 52.50 24.50 24.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -301,7 +301,7 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: - - 1.00 0.50 1.00 0.50 0.50 0.50 pextrd $1, %xmm0, (%rax) # CHECK-NEXT: - - 1.00 0.50 - 0.50 - - pextrq $1, %xmm0, %rcx # CHECK-NEXT: - - 1.00 0.50 1.00 0.50 0.50 0.50 pextrq $1, %xmm0, (%rax) -# CHECK-NEXT: - - - - 1.00 1.00 1.00 1.00 pextrw $1, %xmm0, (%rax) +# CHECK-NEXT: - - - 0.50 1.00 0.50 0.50 0.50 pextrw $1, %xmm0, (%rax) # CHECK-NEXT: - - 1.00 - - - - - phminposuw %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 phminposuw (%rax), %xmm2 # CHECK-NEXT: - - - 0.50 - 1.50 - - pinsrb $1, %eax, %xmm1