forked from OSchip/llvm-project
AMDGPU: Add num spilled s/vgprs to metadata
This was requested by tools. Differential Revision: https://reviews.llvm.org/D40321 llvm-svn: 319192
This commit is contained in:
parent
5011298958
commit
06ae4ec78e
|
@ -1295,6 +1295,16 @@ non-AMD key names should be prefixed by "*vendor-name*.".
|
|||
code is capable of
|
||||
supporting XNACK. See
|
||||
:ref:`amdgpu-target-features`.
|
||||
"NumSpilledSGPRs" integer Number of stores from
|
||||
a scalar register to
|
||||
a register allocator
|
||||
created spill
|
||||
location.
|
||||
"NumSpilledVGPRs" integer Number of stores from
|
||||
a vector register to
|
||||
a register allocator
|
||||
created spill
|
||||
location.
|
||||
============================ ============== ========= =====================
|
||||
|
||||
..
|
||||
|
|
|
@ -244,6 +244,10 @@ constexpr char MaxFlatWorkGroupSize[] = "MaxFlatWorkGroupSize";
|
|||
constexpr char IsDynamicCallStack[] = "IsDynamicCallStack";
|
||||
/// \brief Key for Kernel::CodeProps::Metadata::mIsXNACKEnabled.
|
||||
constexpr char IsXNACKEnabled[] = "IsXNACKEnabled";
|
||||
/// \brief Key for Kernel::CodeProps::Metadata::mNumSpilledSGPRs.
|
||||
constexpr char NumSpilledSGPRs[] = "NumSpilledSGPRs";
|
||||
/// \brief Key for Kernel::CodeProps::Metadata::mNumSpilledVGPRs.
|
||||
constexpr char NumSpilledVGPRs[] = "NumSpilledVGPRs";
|
||||
} // end namespace Key
|
||||
|
||||
/// \brief In-memory representation of kernel code properties metadata.
|
||||
|
@ -275,6 +279,10 @@ struct Metadata final {
|
|||
/// \brief True if the generated machine code is capable of supporting XNACK.
|
||||
/// Optional.
|
||||
bool mIsXNACKEnabled = false;
|
||||
/// \brief Number of SGPRs spilled by a wavefront. Optional.
|
||||
uint16_t mNumSpilledSGPRs = 0;
|
||||
/// \brief Number of VGPRs spilled by a workitem. Optional.
|
||||
uint16_t mNumSpilledVGPRs = 0;
|
||||
|
||||
/// \brief Default constructor.
|
||||
Metadata() = default;
|
||||
|
|
|
@ -148,6 +148,10 @@ struct MappingTraits<Kernel::CodeProps::Metadata> {
|
|||
MD.mIsDynamicCallStack, false);
|
||||
YIO.mapOptional(Kernel::CodeProps::Key::IsXNACKEnabled,
|
||||
MD.mIsXNACKEnabled, false);
|
||||
YIO.mapOptional(Kernel::CodeProps::Key::NumSpilledSGPRs,
|
||||
MD.mNumSpilledSGPRs, uint16_t(0));
|
||||
YIO.mapOptional(Kernel::CodeProps::Key::NumSpilledVGPRs,
|
||||
MD.mNumSpilledVGPRs, uint16_t(0));
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -1188,6 +1188,8 @@ AMDGPU::HSAMD::Kernel::CodeProps::Metadata AMDGPUAsmPrinter::getHSACodeProps(
|
|||
HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
|
||||
HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
|
||||
HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
|
||||
HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
|
||||
HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
|
||||
|
||||
return HSACodeProps;
|
||||
}
|
||||
|
|
|
@ -1,26 +1,26 @@
|
|||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX800 --check-prefix=NOTES %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX803 --check-prefix=NOTES %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
|
||||
|
||||
@var = addrspace(1) global float 0.0
|
||||
|
||||
; CHECK: ---
|
||||
; CHECK: Version: [ 1, 0 ]
|
||||
|
||||
; CHECK: Kernels:
|
||||
; CHECK: - Name: test
|
||||
; CHECK: SymbolName: 'test@kd'
|
||||
; CHECK: CodeProps:
|
||||
; CHECK: KernargSegmentSize: 24
|
||||
; CHECK: GroupSegmentFixedSize: 0
|
||||
; CHECK: PrivateSegmentFixedSize: 0
|
||||
; CHECK: KernargSegmentAlign: 8
|
||||
; CHECK: WavefrontSize: 64
|
||||
; GFX700: NumSGPRs: 6
|
||||
; GFX800: NumSGPRs: 96
|
||||
; GFX900: NumSGPRs: 6
|
||||
; GFX700: NumVGPRs: 4
|
||||
; GFX800: NumVGPRs: 6
|
||||
; GFX900: NumVGPRs: 6
|
||||
; CHECK: MaxFlatWorkGroupSize: 256
|
||||
|
||||
; CHECK: - Name: test
|
||||
; CHECK: SymbolName: 'test@kd'
|
||||
; CHECK: CodeProps:
|
||||
; CHECK: KernargSegmentSize: 24
|
||||
; CHECK: GroupSegmentFixedSize: 0
|
||||
; CHECK: PrivateSegmentFixedSize: 0
|
||||
; CHECK: KernargSegmentAlign: 8
|
||||
; CHECK: WavefrontSize: 64
|
||||
; CHECK: NumSGPRs: 6
|
||||
; GFX700: NumVGPRs: 4
|
||||
; GFX803: NumVGPRs: 6
|
||||
; GFX900: NumVGPRs: 6
|
||||
; CHECK: MaxFlatWorkGroupSize: 256
|
||||
define amdgpu_kernel void @test(
|
||||
half addrspace(1)* %r,
|
||||
half addrspace(1)* %a,
|
||||
|
@ -32,3 +32,111 @@ entry:
|
|||
store half %r.val, half addrspace(1)* %r
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: - Name: num_spilled_sgprs
|
||||
; CHECK: SymbolName: 'num_spilled_sgprs@kd'
|
||||
; CHECK: CodeProps:
|
||||
; CHECK: NumSpilledSGPRs: 41
|
||||
define amdgpu_kernel void @num_spilled_sgprs(
|
||||
i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %out2,
|
||||
i32 addrspace(1)* %out3, i32 addrspace(1)* %out4, i32 addrspace(1)* %out5,
|
||||
i32 addrspace(1)* %out6, i32 addrspace(1)* %out7, i32 addrspace(1)* %out8,
|
||||
i32 addrspace(1)* %out9, i32 addrspace(1)* %outa, i32 addrspace(1)* %outb,
|
||||
i32 addrspace(1)* %outc, i32 addrspace(1)* %outd, i32 addrspace(1)* %oute,
|
||||
i32 addrspace(1)* %outf, i32 %in0, i32 %in1, i32 %in2, i32 %in3, i32 %in4,
|
||||
i32 %in5, i32 %in6, i32 %in7, i32 %in8, i32 %in9, i32 %ina, i32 %inb,
|
||||
i32 %inc, i32 %ind, i32 %ine, i32 %inf) #0 {
|
||||
entry:
|
||||
store i32 %in0, i32 addrspace(1)* %out0
|
||||
store i32 %in1, i32 addrspace(1)* %out1
|
||||
store i32 %in2, i32 addrspace(1)* %out2
|
||||
store i32 %in3, i32 addrspace(1)* %out3
|
||||
store i32 %in4, i32 addrspace(1)* %out4
|
||||
store i32 %in5, i32 addrspace(1)* %out5
|
||||
store i32 %in6, i32 addrspace(1)* %out6
|
||||
store i32 %in7, i32 addrspace(1)* %out7
|
||||
store i32 %in8, i32 addrspace(1)* %out8
|
||||
store i32 %in9, i32 addrspace(1)* %out9
|
||||
store i32 %ina, i32 addrspace(1)* %outa
|
||||
store i32 %inb, i32 addrspace(1)* %outb
|
||||
store i32 %inc, i32 addrspace(1)* %outc
|
||||
store i32 %ind, i32 addrspace(1)* %outd
|
||||
store i32 %ine, i32 addrspace(1)* %oute
|
||||
store i32 %inf, i32 addrspace(1)* %outf
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: - Name: num_spilled_vgprs
|
||||
; CHECK: SymbolName: 'num_spilled_vgprs@kd'
|
||||
; CHECK: CodeProps:
|
||||
; CHECK: NumSpilledVGPRs: 14
|
||||
define amdgpu_kernel void @num_spilled_vgprs() #1 {
|
||||
%val0 = load volatile float, float addrspace(1)* @var
|
||||
%val1 = load volatile float, float addrspace(1)* @var
|
||||
%val2 = load volatile float, float addrspace(1)* @var
|
||||
%val3 = load volatile float, float addrspace(1)* @var
|
||||
%val4 = load volatile float, float addrspace(1)* @var
|
||||
%val5 = load volatile float, float addrspace(1)* @var
|
||||
%val6 = load volatile float, float addrspace(1)* @var
|
||||
%val7 = load volatile float, float addrspace(1)* @var
|
||||
%val8 = load volatile float, float addrspace(1)* @var
|
||||
%val9 = load volatile float, float addrspace(1)* @var
|
||||
%val10 = load volatile float, float addrspace(1)* @var
|
||||
%val11 = load volatile float, float addrspace(1)* @var
|
||||
%val12 = load volatile float, float addrspace(1)* @var
|
||||
%val13 = load volatile float, float addrspace(1)* @var
|
||||
%val14 = load volatile float, float addrspace(1)* @var
|
||||
%val15 = load volatile float, float addrspace(1)* @var
|
||||
%val16 = load volatile float, float addrspace(1)* @var
|
||||
%val17 = load volatile float, float addrspace(1)* @var
|
||||
%val18 = load volatile float, float addrspace(1)* @var
|
||||
%val19 = load volatile float, float addrspace(1)* @var
|
||||
%val20 = load volatile float, float addrspace(1)* @var
|
||||
%val21 = load volatile float, float addrspace(1)* @var
|
||||
%val22 = load volatile float, float addrspace(1)* @var
|
||||
%val23 = load volatile float, float addrspace(1)* @var
|
||||
%val24 = load volatile float, float addrspace(1)* @var
|
||||
%val25 = load volatile float, float addrspace(1)* @var
|
||||
%val26 = load volatile float, float addrspace(1)* @var
|
||||
%val27 = load volatile float, float addrspace(1)* @var
|
||||
%val28 = load volatile float, float addrspace(1)* @var
|
||||
%val29 = load volatile float, float addrspace(1)* @var
|
||||
%val30 = load volatile float, float addrspace(1)* @var
|
||||
|
||||
store volatile float %val0, float addrspace(1)* @var
|
||||
store volatile float %val1, float addrspace(1)* @var
|
||||
store volatile float %val2, float addrspace(1)* @var
|
||||
store volatile float %val3, float addrspace(1)* @var
|
||||
store volatile float %val4, float addrspace(1)* @var
|
||||
store volatile float %val5, float addrspace(1)* @var
|
||||
store volatile float %val6, float addrspace(1)* @var
|
||||
store volatile float %val7, float addrspace(1)* @var
|
||||
store volatile float %val8, float addrspace(1)* @var
|
||||
store volatile float %val9, float addrspace(1)* @var
|
||||
store volatile float %val10, float addrspace(1)* @var
|
||||
store volatile float %val11, float addrspace(1)* @var
|
||||
store volatile float %val12, float addrspace(1)* @var
|
||||
store volatile float %val13, float addrspace(1)* @var
|
||||
store volatile float %val14, float addrspace(1)* @var
|
||||
store volatile float %val15, float addrspace(1)* @var
|
||||
store volatile float %val16, float addrspace(1)* @var
|
||||
store volatile float %val17, float addrspace(1)* @var
|
||||
store volatile float %val18, float addrspace(1)* @var
|
||||
store volatile float %val19, float addrspace(1)* @var
|
||||
store volatile float %val20, float addrspace(1)* @var
|
||||
store volatile float %val21, float addrspace(1)* @var
|
||||
store volatile float %val22, float addrspace(1)* @var
|
||||
store volatile float %val23, float addrspace(1)* @var
|
||||
store volatile float %val24, float addrspace(1)* @var
|
||||
store volatile float %val25, float addrspace(1)* @var
|
||||
store volatile float %val26, float addrspace(1)* @var
|
||||
store volatile float %val27, float addrspace(1)* @var
|
||||
store volatile float %val28, float addrspace(1)* @var
|
||||
store volatile float %val29, float addrspace(1)* @var
|
||||
store volatile float %val30, float addrspace(1)* @var
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-num-sgpr"="14" }
|
||||
attributes #1 = { "amdgpu-num-vgpr"="20" }
|
||||
|
|
|
@ -14,6 +14,8 @@
|
|||
// CHECK: KernargSegmentAlign: 16
|
||||
// CHECK: WavefrontSize: 64
|
||||
// CHECK: MaxFlatWorkGroupSize: 256
|
||||
// CHECK: NumSpilledSGPRs: 1
|
||||
// CHECK: NumSpilledVGPRs: 1
|
||||
.amd_amdgpu_hsa_metadata
|
||||
Version: [ 1, 0 ]
|
||||
Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ]
|
||||
|
@ -27,4 +29,6 @@
|
|||
KernargSegmentAlign: 16
|
||||
WavefrontSize: 64
|
||||
MaxFlatWorkGroupSize: 256
|
||||
NumSpilledSGPRs: 1
|
||||
NumSpilledVGPRs: 1
|
||||
.end_amd_amdgpu_hsa_metadata
|
||||
|
|
Loading…
Reference in New Issue