forked from OSchip/llvm-project
AMDGPU: Remove ability to reserve VGPRs for debugger
Differential Revision: https://reviews.llvm.org/D48234 llvm-svn: 335288
This commit is contained in:
parent
37e9739a58
commit
e004b3d97b
|
@ -652,13 +652,6 @@ def FeatureDebuggerInsertNops : SubtargetFeature<
|
||||||
"Insert one nop instruction for each high level source statement"
|
"Insert one nop instruction for each high level source statement"
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def FeatureDebuggerReserveRegs : SubtargetFeature<
|
|
||||||
"amdgpu-debugger-reserve-regs",
|
|
||||||
"DebuggerReserveRegs",
|
|
||||||
"true",
|
|
||||||
"Reserve registers for debugger usage"
|
|
||||||
>;
|
|
||||||
|
|
||||||
def FeatureDebuggerEmitPrologue : SubtargetFeature<
|
def FeatureDebuggerEmitPrologue : SubtargetFeature<
|
||||||
"amdgpu-debugger-emit-prologue",
|
"amdgpu-debugger-emit-prologue",
|
||||||
"DebuggerEmitPrologue",
|
"DebuggerEmitPrologue",
|
||||||
|
|
|
@ -474,13 +474,6 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
||||||
" NumVGPRsForWavesPerEU: " +
|
" NumVGPRsForWavesPerEU: " +
|
||||||
Twine(CurrentProgramInfo.NumVGPRsForWavesPerEU), false);
|
Twine(CurrentProgramInfo.NumVGPRsForWavesPerEU), false);
|
||||||
|
|
||||||
OutStreamer->emitRawComment(
|
|
||||||
" ReservedVGPRFirst: " + Twine(CurrentProgramInfo.ReservedVGPRFirst),
|
|
||||||
false);
|
|
||||||
OutStreamer->emitRawComment(
|
|
||||||
" ReservedVGPRCount: " + Twine(CurrentProgramInfo.ReservedVGPRCount),
|
|
||||||
false);
|
|
||||||
|
|
||||||
OutStreamer->emitRawComment(
|
OutStreamer->emitRawComment(
|
||||||
" WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false);
|
" WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false);
|
||||||
|
|
||||||
|
@ -831,7 +824,6 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
||||||
// unified.
|
// unified.
|
||||||
unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
|
unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
|
||||||
STM.getFeatureBits(), ProgInfo.VCCUsed, ProgInfo.FlatUsed);
|
STM.getFeatureBits(), ProgInfo.VCCUsed, ProgInfo.FlatUsed);
|
||||||
unsigned ExtraVGPRs = STM.getReservedNumVGPRs(MF);
|
|
||||||
|
|
||||||
// Check the addressable register limit before we add ExtraSGPRs.
|
// Check the addressable register limit before we add ExtraSGPRs.
|
||||||
if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
|
if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
|
||||||
|
@ -852,7 +844,6 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
||||||
|
|
||||||
// Account for extra SGPRs and VGPRs reserved for debugger use.
|
// Account for extra SGPRs and VGPRs reserved for debugger use.
|
||||||
ProgInfo.NumSGPR += ExtraSGPRs;
|
ProgInfo.NumSGPR += ExtraSGPRs;
|
||||||
ProgInfo.NumVGPR += ExtraVGPRs;
|
|
||||||
|
|
||||||
// Ensure there are enough SGPRs and VGPRs for wave dispatch, where wave
|
// Ensure there are enough SGPRs and VGPRs for wave dispatch, where wave
|
||||||
// dispatch registers are function args.
|
// dispatch registers are function args.
|
||||||
|
@ -918,10 +909,6 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
||||||
ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks(
|
ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks(
|
||||||
STM.getFeatureBits(), ProgInfo.NumVGPRsForWavesPerEU);
|
STM.getFeatureBits(), ProgInfo.NumVGPRsForWavesPerEU);
|
||||||
|
|
||||||
// Record first reserved VGPR and number of reserved VGPRs.
|
|
||||||
ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? ProgInfo.NumVGPR : 0;
|
|
||||||
ProgInfo.ReservedVGPRCount = STM.getReservedNumVGPRs(MF);
|
|
||||||
|
|
||||||
// Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
|
// Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
|
||||||
// DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
|
// DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
|
||||||
// attribute was requested.
|
// attribute was requested.
|
||||||
|
@ -1196,8 +1183,6 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
|
||||||
Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
|
Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
|
||||||
Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
|
Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
|
||||||
Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;
|
Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;
|
||||||
Out.reserved_vgpr_first = CurrentProgramInfo.ReservedVGPRFirst;
|
|
||||||
Out.reserved_vgpr_count = CurrentProgramInfo.ReservedVGPRCount;
|
|
||||||
|
|
||||||
// These alignment values are specified in powers of two, so alignment =
|
// These alignment values are specified in powers of two, so alignment =
|
||||||
// 2^n. The minimum alignment is 2^4 = 16.
|
// 2^n. The minimum alignment is 2^4 = 16.
|
||||||
|
@ -1248,8 +1233,6 @@ AMDGPU::HSAMD::Kernel::DebugProps::Metadata AMDGPUAsmPrinter::getHSADebugProps(
|
||||||
|
|
||||||
HSADebugProps.mDebuggerABIVersion.push_back(1);
|
HSADebugProps.mDebuggerABIVersion.push_back(1);
|
||||||
HSADebugProps.mDebuggerABIVersion.push_back(0);
|
HSADebugProps.mDebuggerABIVersion.push_back(0);
|
||||||
HSADebugProps.mReservedNumVGPRs = ProgramInfo.ReservedVGPRCount;
|
|
||||||
HSADebugProps.mReservedFirstVGPR = ProgramInfo.ReservedVGPRFirst;
|
|
||||||
|
|
||||||
if (STM.debuggerEmitPrologue()) {
|
if (STM.debuggerEmitPrologue()) {
|
||||||
HSADebugProps.mPrivateSegmentBufferSGPR =
|
HSADebugProps.mPrivateSegmentBufferSGPR =
|
||||||
|
|
|
@ -84,13 +84,6 @@ private:
|
||||||
// Number of VGPRs that meets number of waves per execution unit request.
|
// Number of VGPRs that meets number of waves per execution unit request.
|
||||||
uint32_t NumVGPRsForWavesPerEU = 0;
|
uint32_t NumVGPRsForWavesPerEU = 0;
|
||||||
|
|
||||||
// If ReservedVGPRCount is 0 then must be 0. Otherwise, this is the first
|
|
||||||
// fixed VGPR number reserved.
|
|
||||||
uint16_t ReservedVGPRFirst = 0;
|
|
||||||
|
|
||||||
// The number of consecutive VGPRs reserved.
|
|
||||||
uint16_t ReservedVGPRCount = 0;
|
|
||||||
|
|
||||||
// Fixed SGPR number used to hold wave scratch offset for entire kernel
|
// Fixed SGPR number used to hold wave scratch offset for entire kernel
|
||||||
// execution, or std::numeric_limits<uint16_t>::max() if the register is not
|
// execution, or std::numeric_limits<uint16_t>::max() if the register is not
|
||||||
// used or not known.
|
// used or not known.
|
||||||
|
|
|
@ -124,7 +124,6 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||||
EnableXNACK(false),
|
EnableXNACK(false),
|
||||||
TrapHandler(false),
|
TrapHandler(false),
|
||||||
DebuggerInsertNops(false),
|
DebuggerInsertNops(false),
|
||||||
DebuggerReserveRegs(false),
|
|
||||||
DebuggerEmitPrologue(false),
|
DebuggerEmitPrologue(false),
|
||||||
|
|
||||||
EnableHugePrivateBuffer(false),
|
EnableHugePrivateBuffer(false),
|
||||||
|
@ -550,10 +549,6 @@ unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
|
||||||
unsigned Requested = AMDGPU::getIntegerAttribute(
|
unsigned Requested = AMDGPU::getIntegerAttribute(
|
||||||
F, "amdgpu-num-vgpr", MaxNumVGPRs);
|
F, "amdgpu-num-vgpr", MaxNumVGPRs);
|
||||||
|
|
||||||
// Make sure requested value does not violate subtarget's specifications.
|
|
||||||
if (Requested && Requested <= getReservedNumVGPRs(MF))
|
|
||||||
Requested = 0;
|
|
||||||
|
|
||||||
// Make sure requested value is compatible with values implied by
|
// Make sure requested value is compatible with values implied by
|
||||||
// default/requested minimum/maximum number of waves per execution unit.
|
// default/requested minimum/maximum number of waves per execution unit.
|
||||||
if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first))
|
if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first))
|
||||||
|
@ -566,7 +561,7 @@ unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
|
||||||
MaxNumVGPRs = Requested;
|
MaxNumVGPRs = Requested;
|
||||||
}
|
}
|
||||||
|
|
||||||
return MaxNumVGPRs - getReservedNumVGPRs(MF);
|
return MaxNumVGPRs;
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
|
@ -124,7 +124,6 @@ protected:
|
||||||
bool EnableXNACK;
|
bool EnableXNACK;
|
||||||
bool TrapHandler;
|
bool TrapHandler;
|
||||||
bool DebuggerInsertNops;
|
bool DebuggerInsertNops;
|
||||||
bool DebuggerReserveRegs;
|
|
||||||
bool DebuggerEmitPrologue;
|
bool DebuggerEmitPrologue;
|
||||||
|
|
||||||
// Used as options.
|
// Used as options.
|
||||||
|
@ -823,18 +822,13 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
bool debuggerSupported() const {
|
bool debuggerSupported() const {
|
||||||
return debuggerInsertNops() && debuggerReserveRegs() &&
|
return debuggerInsertNops() && debuggerEmitPrologue();
|
||||||
debuggerEmitPrologue();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool debuggerInsertNops() const {
|
bool debuggerInsertNops() const {
|
||||||
return DebuggerInsertNops;
|
return DebuggerInsertNops;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool debuggerReserveRegs() const {
|
|
||||||
return DebuggerReserveRegs;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool debuggerEmitPrologue() const {
|
bool debuggerEmitPrologue() const {
|
||||||
return DebuggerEmitPrologue;
|
return DebuggerEmitPrologue;
|
||||||
}
|
}
|
||||||
|
@ -962,11 +956,6 @@ public:
|
||||||
return AMDGPU::IsaInfo::getMaxNumVGPRs(getFeatureBits(), WavesPerEU);
|
return AMDGPU::IsaInfo::getMaxNumVGPRs(getFeatureBits(), WavesPerEU);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns Reserved number of VGPRs for given function \p MF.
|
|
||||||
unsigned getReservedNumVGPRs(const MachineFunction &MF) const {
|
|
||||||
return debuggerReserveRegs() ? 4 : 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \returns Maximum number of VGPRs that meets number of waves per execution
|
/// \returns Maximum number of VGPRs that meets number of waves per execution
|
||||||
/// unit requirement for function \p MF, or number of VGPRs explicitly
|
/// unit requirement for function \p MF, or number of VGPRs explicitly
|
||||||
/// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
|
/// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
|
||||||
|
|
|
@ -85,7 +85,6 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
|
||||||
AMDGPU::FeatureAutoWaitcntBeforeBarrier,
|
AMDGPU::FeatureAutoWaitcntBeforeBarrier,
|
||||||
AMDGPU::FeatureDebuggerEmitPrologue,
|
AMDGPU::FeatureDebuggerEmitPrologue,
|
||||||
AMDGPU::FeatureDebuggerInsertNops,
|
AMDGPU::FeatureDebuggerInsertNops,
|
||||||
AMDGPU::FeatureDebuggerReserveRegs,
|
|
||||||
|
|
||||||
// Property of the kernel/environment which can't actually differ.
|
// Property of the kernel/environment which can't actually differ.
|
||||||
AMDGPU::FeatureSGPRInitBug,
|
AMDGPU::FeatureSGPRInitBug,
|
||||||
|
|
|
@ -1,64 +0,0 @@
|
||||||
; RUN: llc -O0 -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -mattr=+amdgpu-debugger-reserve-regs -verify-machineinstrs < %s | FileCheck %s
|
|
||||||
; RUN: llc -O0 -mtriple=amdgcn--amdhsa-amdgiz -mcpu=gfx900 -mattr=+amdgpu-debugger-reserve-regs -verify-machineinstrs < %s | FileCheck %s
|
|
||||||
target datalayout = "A5"
|
|
||||||
; CHECK: reserved_vgpr_first = {{[0-9]+}}
|
|
||||||
; CHECK-NEXT: reserved_vgpr_count = 4
|
|
||||||
; CHECK: ReservedVGPRFirst: {{[0-9]+}}
|
|
||||||
; CHECK-NEXT: ReservedVGPRCount: 4
|
|
||||||
|
|
||||||
; Function Attrs: nounwind
|
|
||||||
define amdgpu_kernel void @test(i32 addrspace(1)* %A) #0 !dbg !12 {
|
|
||||||
entry:
|
|
||||||
%A.addr = alloca i32 addrspace(1)*, align 4, addrspace(5)
|
|
||||||
store i32 addrspace(1)* %A, i32 addrspace(1)* addrspace(5)* %A.addr, align 4
|
|
||||||
call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* %A.addr, metadata !17, metadata !18), !dbg !19
|
|
||||||
%0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !20
|
|
||||||
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 0, !dbg !20
|
|
||||||
store i32 1, i32 addrspace(1)* %arrayidx, align 4, !dbg !21
|
|
||||||
%1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !22
|
|
||||||
%arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 1, !dbg !22
|
|
||||||
store i32 2, i32 addrspace(1)* %arrayidx1, align 4, !dbg !23
|
|
||||||
%2 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !24
|
|
||||||
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %2, i32 2, !dbg !24
|
|
||||||
store i32 3, i32 addrspace(1)* %arrayidx2, align 4, !dbg !25
|
|
||||||
ret void, !dbg !26
|
|
||||||
}
|
|
||||||
|
|
||||||
; Function Attrs: nounwind readnone
|
|
||||||
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
|
|
||||||
|
|
||||||
attributes #0 = { nounwind }
|
|
||||||
attributes #1 = { nounwind readnone }
|
|
||||||
|
|
||||||
!llvm.dbg.cu = !{!0}
|
|
||||||
!opencl.kernels = !{!3}
|
|
||||||
!llvm.module.flags = !{!9, !10}
|
|
||||||
!llvm.ident = !{!11}
|
|
||||||
|
|
||||||
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (trunk 268929)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
|
|
||||||
!1 = !DIFile(filename: "test01.cl", directory: "/home/kzhuravl/Lightning/testing")
|
|
||||||
!2 = !{}
|
|
||||||
!3 = !{void (i32 addrspace(1)*)* @test, !4, !5, !6, !7, !8}
|
|
||||||
!4 = !{!"kernel_arg_addr_space", i32 1}
|
|
||||||
!5 = !{!"kernel_arg_access_qual", !"none"}
|
|
||||||
!6 = !{!"kernel_arg_type", !"int addrspace(5)*"}
|
|
||||||
!7 = !{!"kernel_arg_base_type", !"int addrspace(5)*"}
|
|
||||||
!8 = !{!"kernel_arg_type_qual", !""}
|
|
||||||
!9 = !{i32 2, !"Dwarf Version", i32 2}
|
|
||||||
!10 = !{i32 2, !"Debug Info Version", i32 3}
|
|
||||||
!11 = !{!"clang version 3.9.0 (trunk 268929)"}
|
|
||||||
!12 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !13, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
|
|
||||||
!13 = !DISubroutineType(types: !14)
|
|
||||||
!14 = !{null, !15}
|
|
||||||
!15 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !16, size: 64, align: 32)
|
|
||||||
!16 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
|
|
||||||
!17 = !DILocalVariable(name: "A", arg: 1, scope: !12, file: !1, line: 1, type: !15)
|
|
||||||
!18 = !DIExpression()
|
|
||||||
!19 = !DILocation(line: 1, column: 30, scope: !12)
|
|
||||||
!20 = !DILocation(line: 2, column: 3, scope: !12)
|
|
||||||
!21 = !DILocation(line: 2, column: 8, scope: !12)
|
|
||||||
!22 = !DILocation(line: 3, column: 3, scope: !12)
|
|
||||||
!23 = !DILocation(line: 3, column: 8, scope: !12)
|
|
||||||
!24 = !DILocation(line: 4, column: 3, scope: !12)
|
|
||||||
!25 = !DILocation(line: 4, column: 8, scope: !12)
|
|
||||||
!26 = !DILocation(line: 5, column: 1, scope: !12)
|
|
|
@ -13,10 +13,6 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata)
|
||||||
; CHECK: SymbolName: 'test@kd'
|
; CHECK: SymbolName: 'test@kd'
|
||||||
; CHECK: DebugProps:
|
; CHECK: DebugProps:
|
||||||
; CHECK: DebuggerABIVersion: [ 1, 0 ]
|
; CHECK: DebuggerABIVersion: [ 1, 0 ]
|
||||||
; CHECK: ReservedNumVGPRs: 4
|
|
||||||
; GFX700: ReservedFirstVGPR: 8
|
|
||||||
; GFX802: ReservedFirstVGPR: 8
|
|
||||||
; GFX900: ReservedFirstVGPR: 10
|
|
||||||
; CHECK: PrivateSegmentBufferSGPR: 0
|
; CHECK: PrivateSegmentBufferSGPR: 0
|
||||||
; CHECK: WavefrontPrivateSegmentOffsetSGPR: 11
|
; CHECK: WavefrontPrivateSegmentOffsetSGPR: 11
|
||||||
define amdgpu_kernel void @test(i32 addrspace(1)* %A) #0 !dbg !7 !kernel_arg_addr_space !12 !kernel_arg_access_qual !13 !kernel_arg_type !14 !kernel_arg_base_type !14 !kernel_arg_type_qual !15 {
|
define amdgpu_kernel void @test(i32 addrspace(1)* %A) #0 !dbg !7 !kernel_arg_addr_space !12 !kernel_arg_access_qual !13 !kernel_arg_type !14 !kernel_arg_base_type !14 !kernel_arg_type_qual !15 {
|
||||||
|
|
Loading…
Reference in New Issue