From d23b6831deb70a7a3cce0d35b25dcfeaad206127 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Wed, 20 Jun 2018 07:01:14 +0000 Subject: [PATCH] [X86][Znver1] Specify Register Files, RCU; FP scheduler capacity. Summary: First off: i do not have any access to that processor, so this is purely theoretical, no benchmarks. I have been looking into b**d**ver2 scheduling profile, and while cross-referencing the existing b**t**ver2, znver1 profiles, and the reference docs (`Software Optimization Guide for AMD Family {15,16,17}h Processors`), i have noticed that only b**t**ver2 scheduling profile specifies these. Also, there is no mca test coverage. Reviewers: RKSimon, craig.topper, courbet, GGanesh, andreadb Reviewed By: GGanesh Subscribers: gbedwell, vprasad, ddibyend, shivaram, Ashutosh, javed.absar, llvm-commits Differential Revision: https://reviews.llvm.org/D47676 llvm-svn: 335099 --- llvm/lib/Target/X86/X86ScheduleZnver1.td | 27 ++++++++++++++++++- .../llvm-mca/X86/register-file-statistics.s | 10 +++++++ .../llvm-mca/X86/scheduler-queue-usage.s | 1 + 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index ec7c861efa64..cf5fe6521634 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -55,7 +55,6 @@ def ZnFPU2 : ProcResource<1>; def ZnFPU3 : ProcResource<1>; // FPU grouping -def ZnFPU : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU2, ZnFPU3]>; def ZnFPU013 : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU3]>; def ZnFPU01 : ProcResGroup<[ZnFPU0, ZnFPU1]>; def ZnFPU12 : ProcResGroup<[ZnFPU1, ZnFPU2]>; @@ -91,6 +90,32 @@ def ZnDivider : ProcResource<1>; // 4 Cycles load-to use Latency is captured def : ReadAdvance; +// The Integer PRF for Zen is 168 entries, and it holds the architectural and +// speculative version of the 64-bit integer registers. +// Reference: "Software Optimization Guide for AMD Family 17h Processors" +def ZnIntegerPRF : RegisterFile<168, [GR8, GR16, GR32, GR64, CCR]>; + +// 36 Entry (9x4 entries) floating-point Scheduler +def ZnFPU : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU2, ZnFPU3]> { +let BufferSize=36; +} + +// The Zen FP Retire Queue renames SIMD and FP uOps onto a pool of 160 128-bit +// registers. Operations on 256-bit data types are cracked into two COPs. +// Reference: "Software Optimization Guide for AMD Family 17h Processors" +def ZnFpuPRF: RegisterFile<160, [VR64, VR128, VR256], [1, 1, 2]>; + +// The unit can track up to 192 macro ops in-flight. +// The retire unit handles in-order commit of up to 8 macro ops per cycle. +// Reference: "Software Optimization Guide for AMD Family 17h Processors" +// To be noted, the retire unit is shared between integer and FP ops. +// In SMT mode it is 96 entry per thread. But, we do not use the conservative +// value here because there is currently no way to fully mode the SMT mode, +// so there is no point in trying. +def ZnRCU : RetireControlUnit<192, 8>; + +// FIXME: there are 72 read buffers and 44 write buffers. + // (a folded load is an instruction that loads and does some operation) // Ex: ADDPD xmm,[mem]-> This instruction has two micro-ops // Instructions with folded loads are usually micro-fused, so they only appear diff --git a/llvm/test/tools/llvm-mca/X86/register-file-statistics.s b/llvm/test/tools/llvm-mca/X86/register-file-statistics.s index c35edf5af55c..858da0e3a4f5 100644 --- a/llvm/test/tools/llvm-mca/X86/register-file-statistics.s +++ b/llvm/test/tools/llvm-mca/X86/register-file-statistics.s @@ -21,7 +21,17 @@ xor %eax, %ebx # BTVER2-NEXT: Total number of mappings created: 0 # BTVER2-NEXT: Max number of mappings used: 0 +# ZNVER1: * Register File #1 -- ZnFpuPRF: +# ZNVER1-NEXT: Number of physical registers: 160 +# ZNVER1-NEXT: Total number of mappings created: 0 +# ZNVER1-NEXT: Max number of mappings used: 0 + # BTVER2: * Register File #2 -- JIntegerPRF: # BTVER2-NEXT: Number of physical registers: 64 # BTVER2-NEXT: Total number of mappings created: 2 # BTVER2-NEXT: Max number of mappings used: 2 + +# ZNVER1: * Register File #2 -- ZnIntegerPRF: +# ZNVER1-NEXT: Number of physical registers: 168 +# ZNVER1-NEXT: Total number of mappings created: 2 +# ZNVER1-NEXT: Max number of mappings used: 2 diff --git a/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s b/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s index 4cf9e6a02308..8448960c67e7 100644 --- a/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s +++ b/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s @@ -49,3 +49,4 @@ xor %eax, %ebx # ZNVER1: Scheduler's queue usage: # ZNVER1-NEXT: ZnAGU, 0/28 # ZNVER1-NEXT: ZnALU, 1/56 +# ZNVER1-NEXT: ZnFPU, 0/36