forked from OSchip/llvm-project
[X86][SLM] Fix HADD/HSUB uops, latency and throughput
Noticed while trying to improve generic reduction costs via the D103695 helper script. Confirmed with Intel AoM / Agner / InstLatX64.
This commit is contained in:
parent
51d04e2268
commit
484944ac3b
|
@ -420,12 +420,12 @@ def : WriteRes<WriteVecExtractSt, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
|
|||
// Horizontal add/sub instructions.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
defm : SLMWriteResPair<WriteFHAdd, [SLM_FPC_RSV01], 6, [6], 4>;
|
||||
defm : SLMWriteResPair<WriteFHAddY, [SLM_FPC_RSV01], 6, [6], 4>;
|
||||
defm : SLMWriteResPair<WriteFHAdd, [SLM_FPC_RSV1], 6, [6], 4, 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteFHAddY>;
|
||||
defm : X86WriteResPairUnsupported<WriteFHAddZ>;
|
||||
defm : SLMWriteResPair<WritePHAdd, [SLM_FPC_RSV01], 1>;
|
||||
defm : SLMWriteResPair<WritePHAddX, [SLM_FPC_RSV01], 1>;
|
||||
defm : SLMWriteResPair<WritePHAddY, [SLM_FPC_RSV01], 1>;
|
||||
defm : SLMWriteResPair<WritePHAdd, [SLM_FPC_RSV01], 6, [6], 3, 1>;
|
||||
defm : SLMWriteResPair<WritePHAddX, [SLM_FPC_RSV01], 6, [6], 3, 1>;
|
||||
defm : X86WriteResPairUnsupported<WritePHAddY>;
|
||||
defm : X86WriteResPairUnsupported<WritePHAddZ>;
|
||||
|
||||
// String instructions.
|
||||
|
|
|
@ -47,14 +47,14 @@ mwait
|
|||
# CHECK-NEXT: 1 7 2.00 * addsubpd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 addsubps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * addsubps (%rax), %xmm2
|
||||
# CHECK-NEXT: 4 6 3.00 haddpd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 4 9 3.00 * haddpd (%rax), %xmm2
|
||||
# CHECK-NEXT: 4 6 3.00 haddps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 4 9 3.00 * haddps (%rax), %xmm2
|
||||
# CHECK-NEXT: 4 6 3.00 hsubpd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 4 9 3.00 * hsubpd (%rax), %xmm2
|
||||
# CHECK-NEXT: 4 6 3.00 hsubps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 4 9 3.00 * hsubps (%rax), %xmm2
|
||||
# CHECK-NEXT: 4 6 6.00 haddpd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 5 9 6.00 * haddpd (%rax), %xmm2
|
||||
# CHECK-NEXT: 4 6 6.00 haddps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 5 9 6.00 * haddps (%rax), %xmm2
|
||||
# CHECK-NEXT: 4 6 6.00 hsubpd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 5 9 6.00 * hsubpd (%rax), %xmm2
|
||||
# CHECK-NEXT: 4 6 6.00 hsubps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 5 9 6.00 * hsubps (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 * lddqu (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 100 1.00 U monitor
|
||||
# CHECK-NEXT: 1 1 1.00 movddup %xmm0, %xmm2
|
||||
|
@ -77,7 +77,7 @@ mwait
|
|||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
|
||||
# CHECK-NEXT: - - - 32.00 30.00 - - 10.00
|
||||
# CHECK-NEXT: - - - 8.00 54.00 - - 10.00
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
|
||||
|
@ -85,14 +85,14 @@ mwait
|
|||
# CHECK-NEXT: - - - - 2.00 - - 1.00 addsubpd (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - 1.00 - - - addsubps %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - - 1.00 - - 1.00 addsubps (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 3.00 3.00 - - - haddpd %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 haddpd (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 3.00 3.00 - - - haddps %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 haddps (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 3.00 3.00 - - - hsubpd %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 hsubpd (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 3.00 3.00 - - - hsubps %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 hsubps (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - 6.00 - - - haddpd %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - - 6.00 - - 1.00 haddpd (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - 6.00 - - - haddps %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - - 6.00 - - 1.00 haddps (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - 6.00 - - - hsubpd %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - - 6.00 - - 1.00 hsubpd (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - 6.00 - - - hsubps %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - - 6.00 - - 1.00 hsubps (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - - - - 1.00 lddqu (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 - - - - monitor
|
||||
# CHECK-NEXT: - - - 1.00 - - - - movddup %xmm0, %xmm2
|
||||
|
|
|
@ -122,30 +122,30 @@ psignw (%rax), %xmm2
|
|||
# CHECK-NEXT: 1 4 1.00 * palignr $1, (%rax), %mm2
|
||||
# CHECK-NEXT: 1 1 1.00 palignr $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 4 1.00 * palignr $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 phaddd %mm0, %mm2
|
||||
# CHECK-NEXT: 1 4 1.00 * phaddd (%rax), %mm2
|
||||
# CHECK-NEXT: 1 1 0.50 phaddd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 4 1.00 * phaddd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 phaddsw %mm0, %mm2
|
||||
# CHECK-NEXT: 1 4 1.00 * phaddsw (%rax), %mm2
|
||||
# CHECK-NEXT: 1 1 0.50 phaddsw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 4 1.00 * phaddsw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 phaddw %mm0, %mm2
|
||||
# CHECK-NEXT: 1 4 1.00 * phaddw (%rax), %mm2
|
||||
# CHECK-NEXT: 1 1 0.50 phaddw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 4 1.00 * phaddw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 phsubd %mm0, %mm2
|
||||
# CHECK-NEXT: 1 4 1.00 * phsubd (%rax), %mm2
|
||||
# CHECK-NEXT: 1 1 0.50 phsubd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 4 1.00 * phsubd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 phsubsw %mm0, %mm2
|
||||
# CHECK-NEXT: 1 4 1.00 * phsubsw (%rax), %mm2
|
||||
# CHECK-NEXT: 1 1 0.50 phsubsw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 4 1.00 * phsubsw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 phsubw %mm0, %mm2
|
||||
# CHECK-NEXT: 1 4 1.00 * phsubw (%rax), %mm2
|
||||
# CHECK-NEXT: 1 1 0.50 phsubw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 4 1.00 * phsubw (%rax), %xmm2
|
||||
# CHECK-NEXT: 3 6 3.00 phaddd %mm0, %mm2
|
||||
# CHECK-NEXT: 4 9 3.00 * phaddd (%rax), %mm2
|
||||
# CHECK-NEXT: 3 6 3.00 phaddd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 4 9 3.00 * phaddd (%rax), %xmm2
|
||||
# CHECK-NEXT: 3 6 3.00 phaddsw %mm0, %mm2
|
||||
# CHECK-NEXT: 4 9 3.00 * phaddsw (%rax), %mm2
|
||||
# CHECK-NEXT: 3 6 3.00 phaddsw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 4 9 3.00 * phaddsw (%rax), %xmm2
|
||||
# CHECK-NEXT: 3 6 3.00 phaddw %mm0, %mm2
|
||||
# CHECK-NEXT: 4 9 3.00 * phaddw (%rax), %mm2
|
||||
# CHECK-NEXT: 3 6 3.00 phaddw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 4 9 3.00 * phaddw (%rax), %xmm2
|
||||
# CHECK-NEXT: 3 6 3.00 phsubd %mm0, %mm2
|
||||
# CHECK-NEXT: 4 9 3.00 * phsubd (%rax), %mm2
|
||||
# CHECK-NEXT: 3 6 3.00 phsubd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 4 9 3.00 * phsubd (%rax), %xmm2
|
||||
# CHECK-NEXT: 3 6 3.00 phsubsw %mm0, %mm2
|
||||
# CHECK-NEXT: 4 9 3.00 * phsubsw (%rax), %mm2
|
||||
# CHECK-NEXT: 3 6 3.00 phsubsw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 4 9 3.00 * phsubsw (%rax), %xmm2
|
||||
# CHECK-NEXT: 3 6 3.00 phsubw %mm0, %mm2
|
||||
# CHECK-NEXT: 4 9 3.00 * phsubw (%rax), %mm2
|
||||
# CHECK-NEXT: 3 6 3.00 phsubw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 4 9 3.00 * phsubw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 4 1.00 pmaddubsw %mm0, %mm2
|
||||
# CHECK-NEXT: 1 7 1.00 * pmaddubsw (%rax), %mm2
|
||||
# CHECK-NEXT: 1 5 2.00 pmaddubsw %xmm0, %xmm2
|
||||
|
@ -183,7 +183,7 @@ psignw (%rax), %xmm2
|
|||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
|
||||
# CHECK-NEXT: - - - 52.00 24.00 - - 32.00
|
||||
# CHECK-NEXT: - - - 112.00 84.00 - - 32.00
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
|
||||
|
@ -203,30 +203,30 @@ psignw (%rax), %xmm2
|
|||
# CHECK-NEXT: - - - 1.00 - - - 1.00 palignr $1, (%rax), %mm2
|
||||
# CHECK-NEXT: - - - 1.00 - - - - palignr $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 - - - 1.00 palignr $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 - - - phaddd %mm0, %mm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phaddd (%rax), %mm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 - - - phaddd %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phaddd (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 - - - phaddsw %mm0, %mm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phaddsw (%rax), %mm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 - - - phaddsw %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phaddsw (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 - - - phaddw %mm0, %mm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phaddw (%rax), %mm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 - - - phaddw %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phaddw (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 - - - phsubd %mm0, %mm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phsubd (%rax), %mm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 - - - phsubd %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phsubd (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 - - - phsubsw %mm0, %mm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phsubsw (%rax), %mm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 - - - phsubsw %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phsubsw (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 - - - phsubw %mm0, %mm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phsubw (%rax), %mm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 - - - phsubw %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phsubw (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 3.00 3.00 - - - phaddd %mm0, %mm2
|
||||
# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phaddd (%rax), %mm2
|
||||
# CHECK-NEXT: - - - 3.00 3.00 - - - phaddd %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phaddd (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 3.00 3.00 - - - phaddsw %mm0, %mm2
|
||||
# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phaddsw (%rax), %mm2
|
||||
# CHECK-NEXT: - - - 3.00 3.00 - - - phaddsw %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phaddsw (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 3.00 3.00 - - - phaddw %mm0, %mm2
|
||||
# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phaddw (%rax), %mm2
|
||||
# CHECK-NEXT: - - - 3.00 3.00 - - - phaddw %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phaddw (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 3.00 3.00 - - - phsubd %mm0, %mm2
|
||||
# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phsubd (%rax), %mm2
|
||||
# CHECK-NEXT: - - - 3.00 3.00 - - - phsubd %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phsubd (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 3.00 3.00 - - - phsubsw %mm0, %mm2
|
||||
# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phsubsw (%rax), %mm2
|
||||
# CHECK-NEXT: - - - 3.00 3.00 - - - phsubsw %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phsubsw (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 3.00 3.00 - - - phsubw %mm0, %mm2
|
||||
# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phsubw (%rax), %mm2
|
||||
# CHECK-NEXT: - - - 3.00 3.00 - - - phsubw %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phsubw (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 - - - - pmaddubsw %mm0, %mm2
|
||||
# CHECK-NEXT: - - - 1.00 - - - 1.00 pmaddubsw (%rax), %mm2
|
||||
# CHECK-NEXT: - - - 2.00 - - - - pmaddubsw %xmm0, %xmm2
|
||||
|
|
Loading…
Reference in New Issue