forked from OSchip/llvm-project
AMDGPU: Add num spilled s/vgprs to metadata
This was requested by tools. Differential Revision: https://reviews.llvm.org/D40321 llvm-svn: 319192
This commit is contained in:
parent
5011298958
commit
06ae4ec78e
|
@ -1295,6 +1295,16 @@ non-AMD key names should be prefixed by "*vendor-name*.".
|
||||||
code is capable of
|
code is capable of
|
||||||
supporting XNACK. See
|
supporting XNACK. See
|
||||||
:ref:`amdgpu-target-features`.
|
:ref:`amdgpu-target-features`.
|
||||||
|
"NumSpilledSGPRs" integer Number of stores from
|
||||||
|
a scalar register to
|
||||||
|
a register allocator
|
||||||
|
created spill
|
||||||
|
location.
|
||||||
|
"NumSpilledVGPRs" integer Number of stores from
|
||||||
|
a vector register to
|
||||||
|
a register allocator
|
||||||
|
created spill
|
||||||
|
location.
|
||||||
============================ ============== ========= =====================
|
============================ ============== ========= =====================
|
||||||
|
|
||||||
..
|
..
|
||||||
|
|
|
@ -244,6 +244,10 @@ constexpr char MaxFlatWorkGroupSize[] = "MaxFlatWorkGroupSize";
|
||||||
constexpr char IsDynamicCallStack[] = "IsDynamicCallStack";
|
constexpr char IsDynamicCallStack[] = "IsDynamicCallStack";
|
||||||
/// \brief Key for Kernel::CodeProps::Metadata::mIsXNACKEnabled.
|
/// \brief Key for Kernel::CodeProps::Metadata::mIsXNACKEnabled.
|
||||||
constexpr char IsXNACKEnabled[] = "IsXNACKEnabled";
|
constexpr char IsXNACKEnabled[] = "IsXNACKEnabled";
|
||||||
|
/// \brief Key for Kernel::CodeProps::Metadata::mNumSpilledSGPRs.
|
||||||
|
constexpr char NumSpilledSGPRs[] = "NumSpilledSGPRs";
|
||||||
|
/// \brief Key for Kernel::CodeProps::Metadata::mNumSpilledVGPRs.
|
||||||
|
constexpr char NumSpilledVGPRs[] = "NumSpilledVGPRs";
|
||||||
} // end namespace Key
|
} // end namespace Key
|
||||||
|
|
||||||
/// \brief In-memory representation of kernel code properties metadata.
|
/// \brief In-memory representation of kernel code properties metadata.
|
||||||
|
@ -275,6 +279,10 @@ struct Metadata final {
|
||||||
/// \brief True if the generated machine code is capable of supporting XNACK.
|
/// \brief True if the generated machine code is capable of supporting XNACK.
|
||||||
/// Optional.
|
/// Optional.
|
||||||
bool mIsXNACKEnabled = false;
|
bool mIsXNACKEnabled = false;
|
||||||
|
/// \brief Number of SGPRs spilled by a wavefront. Optional.
|
||||||
|
uint16_t mNumSpilledSGPRs = 0;
|
||||||
|
/// \brief Number of VGPRs spilled by a workitem. Optional.
|
||||||
|
uint16_t mNumSpilledVGPRs = 0;
|
||||||
|
|
||||||
/// \brief Default constructor.
|
/// \brief Default constructor.
|
||||||
Metadata() = default;
|
Metadata() = default;
|
||||||
|
|
|
@ -148,6 +148,10 @@ struct MappingTraits<Kernel::CodeProps::Metadata> {
|
||||||
MD.mIsDynamicCallStack, false);
|
MD.mIsDynamicCallStack, false);
|
||||||
YIO.mapOptional(Kernel::CodeProps::Key::IsXNACKEnabled,
|
YIO.mapOptional(Kernel::CodeProps::Key::IsXNACKEnabled,
|
||||||
MD.mIsXNACKEnabled, false);
|
MD.mIsXNACKEnabled, false);
|
||||||
|
YIO.mapOptional(Kernel::CodeProps::Key::NumSpilledSGPRs,
|
||||||
|
MD.mNumSpilledSGPRs, uint16_t(0));
|
||||||
|
YIO.mapOptional(Kernel::CodeProps::Key::NumSpilledVGPRs,
|
||||||
|
MD.mNumSpilledVGPRs, uint16_t(0));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -1188,6 +1188,8 @@ AMDGPU::HSAMD::Kernel::CodeProps::Metadata AMDGPUAsmPrinter::getHSACodeProps(
|
||||||
HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
|
HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
|
||||||
HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
|
HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
|
||||||
HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
|
HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
|
||||||
|
HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
|
||||||
|
HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
|
||||||
|
|
||||||
return HSACodeProps;
|
return HSACodeProps;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,26 +1,26 @@
|
||||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s
|
||||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX800 --check-prefix=NOTES %s
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX803 --check-prefix=NOTES %s
|
||||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
|
||||||
|
|
||||||
|
@var = addrspace(1) global float 0.0
|
||||||
|
|
||||||
; CHECK: ---
|
; CHECK: ---
|
||||||
; CHECK: Version: [ 1, 0 ]
|
; CHECK: Version: [ 1, 0 ]
|
||||||
|
|
||||||
; CHECK: Kernels:
|
; CHECK: Kernels:
|
||||||
; CHECK: - Name: test
|
|
||||||
; CHECK: SymbolName: 'test@kd'
|
; CHECK: - Name: test
|
||||||
; CHECK: CodeProps:
|
; CHECK: SymbolName: 'test@kd'
|
||||||
; CHECK: KernargSegmentSize: 24
|
; CHECK: CodeProps:
|
||||||
; CHECK: GroupSegmentFixedSize: 0
|
; CHECK: KernargSegmentSize: 24
|
||||||
; CHECK: PrivateSegmentFixedSize: 0
|
; CHECK: GroupSegmentFixedSize: 0
|
||||||
; CHECK: KernargSegmentAlign: 8
|
; CHECK: PrivateSegmentFixedSize: 0
|
||||||
; CHECK: WavefrontSize: 64
|
; CHECK: KernargSegmentAlign: 8
|
||||||
; GFX700: NumSGPRs: 6
|
; CHECK: WavefrontSize: 64
|
||||||
; GFX800: NumSGPRs: 96
|
; CHECK: NumSGPRs: 6
|
||||||
; GFX900: NumSGPRs: 6
|
; GFX700: NumVGPRs: 4
|
||||||
; GFX700: NumVGPRs: 4
|
; GFX803: NumVGPRs: 6
|
||||||
; GFX800: NumVGPRs: 6
|
; GFX900: NumVGPRs: 6
|
||||||
; GFX900: NumVGPRs: 6
|
; CHECK: MaxFlatWorkGroupSize: 256
|
||||||
; CHECK: MaxFlatWorkGroupSize: 256
|
|
||||||
define amdgpu_kernel void @test(
|
define amdgpu_kernel void @test(
|
||||||
half addrspace(1)* %r,
|
half addrspace(1)* %r,
|
||||||
half addrspace(1)* %a,
|
half addrspace(1)* %a,
|
||||||
|
@ -32,3 +32,111 @@ entry:
|
||||||
store half %r.val, half addrspace(1)* %r
|
store half %r.val, half addrspace(1)* %r
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; CHECK: - Name: num_spilled_sgprs
|
||||||
|
; CHECK: SymbolName: 'num_spilled_sgprs@kd'
|
||||||
|
; CHECK: CodeProps:
|
||||||
|
; CHECK: NumSpilledSGPRs: 41
|
||||||
|
define amdgpu_kernel void @num_spilled_sgprs(
|
||||||
|
i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %out2,
|
||||||
|
i32 addrspace(1)* %out3, i32 addrspace(1)* %out4, i32 addrspace(1)* %out5,
|
||||||
|
i32 addrspace(1)* %out6, i32 addrspace(1)* %out7, i32 addrspace(1)* %out8,
|
||||||
|
i32 addrspace(1)* %out9, i32 addrspace(1)* %outa, i32 addrspace(1)* %outb,
|
||||||
|
i32 addrspace(1)* %outc, i32 addrspace(1)* %outd, i32 addrspace(1)* %oute,
|
||||||
|
i32 addrspace(1)* %outf, i32 %in0, i32 %in1, i32 %in2, i32 %in3, i32 %in4,
|
||||||
|
i32 %in5, i32 %in6, i32 %in7, i32 %in8, i32 %in9, i32 %ina, i32 %inb,
|
||||||
|
i32 %inc, i32 %ind, i32 %ine, i32 %inf) #0 {
|
||||||
|
entry:
|
||||||
|
store i32 %in0, i32 addrspace(1)* %out0
|
||||||
|
store i32 %in1, i32 addrspace(1)* %out1
|
||||||
|
store i32 %in2, i32 addrspace(1)* %out2
|
||||||
|
store i32 %in3, i32 addrspace(1)* %out3
|
||||||
|
store i32 %in4, i32 addrspace(1)* %out4
|
||||||
|
store i32 %in5, i32 addrspace(1)* %out5
|
||||||
|
store i32 %in6, i32 addrspace(1)* %out6
|
||||||
|
store i32 %in7, i32 addrspace(1)* %out7
|
||||||
|
store i32 %in8, i32 addrspace(1)* %out8
|
||||||
|
store i32 %in9, i32 addrspace(1)* %out9
|
||||||
|
store i32 %ina, i32 addrspace(1)* %outa
|
||||||
|
store i32 %inb, i32 addrspace(1)* %outb
|
||||||
|
store i32 %inc, i32 addrspace(1)* %outc
|
||||||
|
store i32 %ind, i32 addrspace(1)* %outd
|
||||||
|
store i32 %ine, i32 addrspace(1)* %oute
|
||||||
|
store i32 %inf, i32 addrspace(1)* %outf
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: - Name: num_spilled_vgprs
|
||||||
|
; CHECK: SymbolName: 'num_spilled_vgprs@kd'
|
||||||
|
; CHECK: CodeProps:
|
||||||
|
; CHECK: NumSpilledVGPRs: 14
|
||||||
|
define amdgpu_kernel void @num_spilled_vgprs() #1 {
|
||||||
|
%val0 = load volatile float, float addrspace(1)* @var
|
||||||
|
%val1 = load volatile float, float addrspace(1)* @var
|
||||||
|
%val2 = load volatile float, float addrspace(1)* @var
|
||||||
|
%val3 = load volatile float, float addrspace(1)* @var
|
||||||
|
%val4 = load volatile float, float addrspace(1)* @var
|
||||||
|
%val5 = load volatile float, float addrspace(1)* @var
|
||||||
|
%val6 = load volatile float, float addrspace(1)* @var
|
||||||
|
%val7 = load volatile float, float addrspace(1)* @var
|
||||||
|
%val8 = load volatile float, float addrspace(1)* @var
|
||||||
|
%val9 = load volatile float, float addrspace(1)* @var
|
||||||
|
%val10 = load volatile float, float addrspace(1)* @var
|
||||||
|
%val11 = load volatile float, float addrspace(1)* @var
|
||||||
|
%val12 = load volatile float, float addrspace(1)* @var
|
||||||
|
%val13 = load volatile float, float addrspace(1)* @var
|
||||||
|
%val14 = load volatile float, float addrspace(1)* @var
|
||||||
|
%val15 = load volatile float, float addrspace(1)* @var
|
||||||
|
%val16 = load volatile float, float addrspace(1)* @var
|
||||||
|
%val17 = load volatile float, float addrspace(1)* @var
|
||||||
|
%val18 = load volatile float, float addrspace(1)* @var
|
||||||
|
%val19 = load volatile float, float addrspace(1)* @var
|
||||||
|
%val20 = load volatile float, float addrspace(1)* @var
|
||||||
|
%val21 = load volatile float, float addrspace(1)* @var
|
||||||
|
%val22 = load volatile float, float addrspace(1)* @var
|
||||||
|
%val23 = load volatile float, float addrspace(1)* @var
|
||||||
|
%val24 = load volatile float, float addrspace(1)* @var
|
||||||
|
%val25 = load volatile float, float addrspace(1)* @var
|
||||||
|
%val26 = load volatile float, float addrspace(1)* @var
|
||||||
|
%val27 = load volatile float, float addrspace(1)* @var
|
||||||
|
%val28 = load volatile float, float addrspace(1)* @var
|
||||||
|
%val29 = load volatile float, float addrspace(1)* @var
|
||||||
|
%val30 = load volatile float, float addrspace(1)* @var
|
||||||
|
|
||||||
|
store volatile float %val0, float addrspace(1)* @var
|
||||||
|
store volatile float %val1, float addrspace(1)* @var
|
||||||
|
store volatile float %val2, float addrspace(1)* @var
|
||||||
|
store volatile float %val3, float addrspace(1)* @var
|
||||||
|
store volatile float %val4, float addrspace(1)* @var
|
||||||
|
store volatile float %val5, float addrspace(1)* @var
|
||||||
|
store volatile float %val6, float addrspace(1)* @var
|
||||||
|
store volatile float %val7, float addrspace(1)* @var
|
||||||
|
store volatile float %val8, float addrspace(1)* @var
|
||||||
|
store volatile float %val9, float addrspace(1)* @var
|
||||||
|
store volatile float %val10, float addrspace(1)* @var
|
||||||
|
store volatile float %val11, float addrspace(1)* @var
|
||||||
|
store volatile float %val12, float addrspace(1)* @var
|
||||||
|
store volatile float %val13, float addrspace(1)* @var
|
||||||
|
store volatile float %val14, float addrspace(1)* @var
|
||||||
|
store volatile float %val15, float addrspace(1)* @var
|
||||||
|
store volatile float %val16, float addrspace(1)* @var
|
||||||
|
store volatile float %val17, float addrspace(1)* @var
|
||||||
|
store volatile float %val18, float addrspace(1)* @var
|
||||||
|
store volatile float %val19, float addrspace(1)* @var
|
||||||
|
store volatile float %val20, float addrspace(1)* @var
|
||||||
|
store volatile float %val21, float addrspace(1)* @var
|
||||||
|
store volatile float %val22, float addrspace(1)* @var
|
||||||
|
store volatile float %val23, float addrspace(1)* @var
|
||||||
|
store volatile float %val24, float addrspace(1)* @var
|
||||||
|
store volatile float %val25, float addrspace(1)* @var
|
||||||
|
store volatile float %val26, float addrspace(1)* @var
|
||||||
|
store volatile float %val27, float addrspace(1)* @var
|
||||||
|
store volatile float %val28, float addrspace(1)* @var
|
||||||
|
store volatile float %val29, float addrspace(1)* @var
|
||||||
|
store volatile float %val30, float addrspace(1)* @var
|
||||||
|
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { "amdgpu-num-sgpr"="14" }
|
||||||
|
attributes #1 = { "amdgpu-num-vgpr"="20" }
|
||||||
|
|
|
@ -14,6 +14,8 @@
|
||||||
// CHECK: KernargSegmentAlign: 16
|
// CHECK: KernargSegmentAlign: 16
|
||||||
// CHECK: WavefrontSize: 64
|
// CHECK: WavefrontSize: 64
|
||||||
// CHECK: MaxFlatWorkGroupSize: 256
|
// CHECK: MaxFlatWorkGroupSize: 256
|
||||||
|
// CHECK: NumSpilledSGPRs: 1
|
||||||
|
// CHECK: NumSpilledVGPRs: 1
|
||||||
.amd_amdgpu_hsa_metadata
|
.amd_amdgpu_hsa_metadata
|
||||||
Version: [ 1, 0 ]
|
Version: [ 1, 0 ]
|
||||||
Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ]
|
Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ]
|
||||||
|
@ -27,4 +29,6 @@
|
||||||
KernargSegmentAlign: 16
|
KernargSegmentAlign: 16
|
||||||
WavefrontSize: 64
|
WavefrontSize: 64
|
||||||
MaxFlatWorkGroupSize: 256
|
MaxFlatWorkGroupSize: 256
|
||||||
|
NumSpilledSGPRs: 1
|
||||||
|
NumSpilledVGPRs: 1
|
||||||
.end_amd_amdgpu_hsa_metadata
|
.end_amd_amdgpu_hsa_metadata
|
||||||
|
|
Loading…
Reference in New Issue