forked from OSchip/llvm-project
[AMDGPU] Add remarks to output some resource usage
Add analyis remarks to output kernel name, register usage, occupancy, scratch usage, spills, and LDS information. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D123878
This commit is contained in:
parent
56796ae1a8
commit
67357739c6
|
@ -0,0 +1,17 @@
|
|||
// REQUIRES: amdgpu-registered-target
|
||||
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx908 -Rpass-analysis=kernel-resource-usage -S -O0 -verify %s -o /dev/null
|
||||
|
||||
// expected-remark@+9 {{Function Name: foo}}
|
||||
// expected-remark@+8 {{ SGPRs: 9}}
|
||||
// expected-remark@+7 {{ VGPRs: 10}}
|
||||
// expected-remark@+6 {{ AGPRs: 12}}
|
||||
// expected-remark@+5 {{ ScratchSize [bytes/lane]: 0}}
|
||||
// expected-remark@+4 {{ Occupancy [waves/SIMD]: 10}}
|
||||
// expected-remark@+3 {{ SGPRs Spill: 0}}
|
||||
// expected-remark@+2 {{ VGPRs Spill: 0}}
|
||||
// expected-remark@+1 {{ LDS Size [bytes/block]: 0}}
|
||||
__kernel void foo() {
|
||||
__asm volatile ("; clobber s8" :::"s8");
|
||||
__asm volatile ("; clobber v9" :::"v9");
|
||||
__asm volatile ("; clobber a11" :::"a11");
|
||||
}
|
|
@ -27,8 +27,10 @@
|
|||
#include "SIMachineFunctionInfo.h"
|
||||
#include "TargetInfo/AMDGPUTargetInfo.h"
|
||||
#include "Utils/AMDGPUBaseInfo.h"
|
||||
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
|
||||
#include "llvm/BinaryFormat/ELF.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
|
||||
#include "llvm/IR/DiagnosticInfo.h"
|
||||
#include "llvm/MC/MCAssembler.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
|
@ -506,6 +508,9 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
|||
|
||||
emitFunctionBody();
|
||||
|
||||
emitResourceUsageRemarks(MF, CurrentProgramInfo, MFI->isModuleEntryFunction(),
|
||||
STM.hasMAIInsts());
|
||||
|
||||
if (isVerbose()) {
|
||||
MCSectionELF *CommentSection =
|
||||
Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
|
||||
|
@ -875,6 +880,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
|||
LDSAlignShift = 9;
|
||||
}
|
||||
|
||||
ProgInfo.SGPRSpill = MFI->getNumSpilledSGPRs();
|
||||
ProgInfo.VGPRSpill = MFI->getNumSpilledVGPRs();
|
||||
|
||||
ProgInfo.LDSSize = MFI->getLDSSize();
|
||||
ProgInfo.LDSBlocks =
|
||||
alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
|
||||
|
@ -1180,3 +1188,58 @@ void AMDGPUAsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
|
|||
AU.addPreserved<AMDGPUResourceUsageAnalysis>();
|
||||
AsmPrinter::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
void AMDGPUAsmPrinter::emitResourceUsageRemarks(
|
||||
const MachineFunction &MF, const SIProgramInfo &CurrentProgramInfo,
|
||||
bool isModuleEntryFunction, bool hasMAIInsts) {
|
||||
if (!ORE)
|
||||
return;
|
||||
|
||||
const char *Name = "kernel-resource-usage";
|
||||
const char *Indent = " ";
|
||||
|
||||
// If the remark is not specifically enabled, do not output to yaml
|
||||
LLVMContext &Ctx = MF.getFunction().getContext();
|
||||
if (!Ctx.getDiagHandlerPtr()->isAnalysisRemarkEnabled(Name))
|
||||
return;
|
||||
|
||||
auto EmitResourceUsageRemark = [&](StringRef RemarkName,
|
||||
StringRef RemarkLabel, auto Argument) {
|
||||
// Add an indent for every line besides the line with the kernel name. This
|
||||
// makes it easier to tell which resource usage go with which kernel since
|
||||
// the kernel name will always be displayed first.
|
||||
std::string LabelStr = RemarkLabel.str() + ": ";
|
||||
if (!RemarkName.equals("FunctionName"))
|
||||
LabelStr = Indent + LabelStr;
|
||||
|
||||
ORE->emit([&]() {
|
||||
return MachineOptimizationRemarkAnalysis(Name, RemarkName,
|
||||
MF.getFunction().getSubprogram(),
|
||||
&MF.front())
|
||||
<< LabelStr << ore::NV(RemarkName, Argument);
|
||||
});
|
||||
};
|
||||
|
||||
// FIXME: Formatting here is pretty nasty because clang does not accept
|
||||
// newlines from diagnostics. This forces us to emit multiple diagnostic
|
||||
// remarks to simulate newlines. If and when clang does accept newlines, this
|
||||
// formatting should be aggregated into one remark with newlines to avoid
|
||||
// printing multiple diagnostic location and diag opts.
|
||||
EmitResourceUsageRemark("FunctionName", "Function Name",
|
||||
MF.getFunction().getName());
|
||||
EmitResourceUsageRemark("NumSGPR", "SGPRs", CurrentProgramInfo.NumSGPR);
|
||||
EmitResourceUsageRemark("NumVGPR", "VGPRs", CurrentProgramInfo.NumArchVGPR);
|
||||
if (hasMAIInsts)
|
||||
EmitResourceUsageRemark("NumAGPR", "AGPRs", CurrentProgramInfo.NumAccVGPR);
|
||||
EmitResourceUsageRemark("ScratchSize", "ScratchSize [bytes/lane]",
|
||||
CurrentProgramInfo.ScratchSize);
|
||||
EmitResourceUsageRemark("Occupancy", "Occupancy [waves/SIMD]",
|
||||
CurrentProgramInfo.Occupancy);
|
||||
EmitResourceUsageRemark("SGPRSpill", "SGPRs Spill",
|
||||
CurrentProgramInfo.SGPRSpill);
|
||||
EmitResourceUsageRemark("VGPRSpill", "VGPRs Spill",
|
||||
CurrentProgramInfo.VGPRSpill);
|
||||
if (isModuleEntryFunction)
|
||||
EmitResourceUsageRemark("BytesLDS", "LDS Size [bytes/block]",
|
||||
CurrentProgramInfo.LDSSize);
|
||||
}
|
||||
|
|
|
@ -69,6 +69,9 @@ private:
|
|||
uint64_t ScratchSize,
|
||||
uint64_t CodeSize,
|
||||
const AMDGPUMachineFunction* MFI);
|
||||
void emitResourceUsageRemarks(const MachineFunction &MF,
|
||||
const SIProgramInfo &CurrentProgramInfo,
|
||||
bool isModuleEntryFunction, bool hasMAIInsts);
|
||||
|
||||
uint16_t getAmdhsaKernelCodeProperties(
|
||||
const MachineFunction &MF) const;
|
||||
|
|
|
@ -49,6 +49,8 @@ struct SIProgramInfo {
|
|||
uint32_t AccumOffset = 0;
|
||||
uint32_t TgSplit = 0;
|
||||
uint32_t NumSGPR = 0;
|
||||
unsigned SGPRSpill = 0;
|
||||
unsigned VGPRSpill = 0;
|
||||
uint32_t LDSSize = 0;
|
||||
bool FlatUsed = false;
|
||||
|
||||
|
|
|
@ -0,0 +1,158 @@
|
|||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=obj -o /dev/null %s 2>&1 | FileCheck -check-prefix=STDERR %s
|
||||
; RUN: FileCheck -check-prefix=REMARK %s < %t
|
||||
|
||||
; STDERR: remark: foo.cl:27:0: Function Name: test_kernel
|
||||
; STDERR-NEXT: remark: foo.cl:27:0: SGPRs: 24
|
||||
; STDERR-NEXT: remark: foo.cl:27:0: VGPRs: 9
|
||||
; STDERR-NEXT: remark: foo.cl:27:0: AGPRs: 43
|
||||
; STDERR-NEXT: remark: foo.cl:27:0: ScratchSize [bytes/lane]: 0
|
||||
; STDERR-NEXT: remark: foo.cl:27:0: Occupancy [waves/SIMD]: 5
|
||||
; STDERR-NEXT: remark: foo.cl:27:0: SGPRs Spill: 0
|
||||
; STDERR-NEXT: remark: foo.cl:27:0: VGPRs Spill: 0
|
||||
; STDERR-NEXT: remark: foo.cl:27:0: LDS Size [bytes/block]: 512
|
||||
|
||||
; REMARK-LABEL: --- !Analysis
|
||||
; REMARK: Pass: kernel-resource-usage
|
||||
; REMARK-NEXT: Name: FunctionName
|
||||
; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 }
|
||||
; REMARK-NEXT: Function: test_kernel
|
||||
; REMARK-NEXT: Args:
|
||||
; REMARK-NEXT: - String: 'Function Name: '
|
||||
; REMARK-NEXT: - FunctionName: test_kernel
|
||||
; REMARK-NEXT: ...
|
||||
; REMARK-NEXT: --- !Analysis
|
||||
; REMARK-NEXT: Pass: kernel-resource-usage
|
||||
; REMARK-NEXT: Name: NumSGPR
|
||||
; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 }
|
||||
; REMARK-NEXT: Function: test_kernel
|
||||
; REMARK-NEXT: Args:
|
||||
; REMARK-NEXT: - String: ' SGPRs: '
|
||||
; REMARK-NEXT: - NumSGPR: '24'
|
||||
; REMARK-NEXT: ...
|
||||
; REMARK-NEXT: --- !Analysis
|
||||
; REMARK-NEXT: Pass: kernel-resource-usage
|
||||
; REMARK-NEXT: Name: NumVGPR
|
||||
; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 }
|
||||
; REMARK-NEXT: Function: test_kernel
|
||||
; REMARK-NEXT: Args:
|
||||
; REMARK-NEXT: - String: ' VGPRs: '
|
||||
; REMARK-NEXT: - NumVGPR: '9'
|
||||
; REMARK-NEXT: ...
|
||||
; REMARK-NEXT: --- !Analysis
|
||||
; REMARK-NEXT: Pass: kernel-resource-usage
|
||||
; REMARK-NEXT: Name: NumAGPR
|
||||
; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 }
|
||||
; REMARK-NEXT: Function: test_kernel
|
||||
; REMARK-NEXT: Args:
|
||||
; REMARK-NEXT: - String: ' AGPRs: '
|
||||
; REMARK-NEXT: - NumAGPR: '43'
|
||||
; REMARK-NEXT: ...
|
||||
; REMARK-NEXT: --- !Analysis
|
||||
; REMARK-NEXT: Pass: kernel-resource-usage
|
||||
; REMARK-NEXT: Name: ScratchSize
|
||||
; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 }
|
||||
; REMARK-NEXT: Function: test_kernel
|
||||
; REMARK-NEXT: Args:
|
||||
; REMARK-NEXT: - String: ' ScratchSize [bytes/lane]: '
|
||||
; REMARK-NEXT: - ScratchSize: '0'
|
||||
; REMARK-NEXT: ...
|
||||
; REMARK-NEXT: --- !Analysis
|
||||
; REMARK-NEXT: Pass: kernel-resource-usage
|
||||
; REMARK-NEXT: Name: Occupancy
|
||||
; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 }
|
||||
; REMARK-NEXT: Function: test_kernel
|
||||
; REMARK-NEXT: Args:
|
||||
; REMARK-NEXT: - String: ' Occupancy [waves/SIMD]: '
|
||||
; REMARK-NEXT: - Occupancy: '5'
|
||||
; REMARK-NEXT: ...
|
||||
; REMARK-NEXT: --- !Analysis
|
||||
; REMARK-NEXT: Pass: kernel-resource-usage
|
||||
; REMARK-NEXT: Name: SGPRSpill
|
||||
; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 }
|
||||
; REMARK-NEXT: Function: test_kernel
|
||||
; REMARK-NEXT: Args:
|
||||
; REMARK-NEXT: - String: ' SGPRs Spill: '
|
||||
; REMARK-NEXT: - SGPRSpill: '0'
|
||||
; REMARK-NEXT: ...
|
||||
; REMARK-NEXT: --- !Analysis
|
||||
; REMARK-NEXT: Pass: kernel-resource-usage
|
||||
; REMARK-NEXT: Name: VGPRSpill
|
||||
; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 }
|
||||
; REMARK-NEXT: Function: test_kernel
|
||||
; REMARK-NEXT: Args:
|
||||
; REMARK-NEXT: - String: ' VGPRs Spill: '
|
||||
; REMARK-NEXT: - VGPRSpill: '0'
|
||||
; REMARK-NEXT: ...
|
||||
; REMARK-NEXT: --- !Analysis
|
||||
; REMARK-NEXT: Pass: kernel-resource-usage
|
||||
; REMARK-NEXT: Name: BytesLDS
|
||||
; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 }
|
||||
; REMARK-NEXT: Function: test_kernel
|
||||
; REMARK-NEXT: Args:
|
||||
; REMARK-NEXT: - String: ' LDS Size [bytes/block]: '
|
||||
; REMARK-NEXT: - BytesLDS: '512'
|
||||
; REMARK-NEXT: ...
|
||||
|
||||
@lds = internal unnamed_addr addrspace(3) global [128 x i32] undef, align 4
|
||||
|
||||
define amdgpu_kernel void @test_kernel() !dbg !3 {
|
||||
call void asm sideeffect "; clobber v8", "~{v8}"()
|
||||
call void asm sideeffect "; clobber s23", "~{s23}"()
|
||||
call void asm sideeffect "; clobber a42", "~{a42}"()
|
||||
call void asm sideeffect "; use $0", "v"([128 x i32] addrspace(3)* @lds)
|
||||
ret void
|
||||
}
|
||||
|
||||
; STDERR: remark: foo.cl:42:0: Function Name: test_func
|
||||
; STDERR-NEXT: remark: foo.cl:42:0: SGPRs: 0
|
||||
; STDERR-NEXT: remark: foo.cl:42:0: VGPRs: 0
|
||||
; STDERR-NEXT: remark: foo.cl:42:0: AGPRs: 0
|
||||
; STDERR-NEXT: remark: foo.cl:42:0: ScratchSize [bytes/lane]: 0
|
||||
; STDERR-NEXT: remark: foo.cl:42:0: Occupancy [waves/SIMD]: 0
|
||||
; STDERR-NEXT: remark: foo.cl:42:0: SGPRs Spill: 0
|
||||
; STDERR-NEXT: remark: foo.cl:42:0: VGPRs Spill: 0
|
||||
; STDERR-NOT: LDS Size
|
||||
define void @test_func() !dbg !6 {
|
||||
call void asm sideeffect "; clobber v17", "~{v17}"()
|
||||
call void asm sideeffect "; clobber s11", "~{s11}"()
|
||||
call void asm sideeffect "; clobber a9", "~{a9}"()
|
||||
ret void
|
||||
}
|
||||
|
||||
; STDERR: remark: foo.cl:8:0: Function Name: empty_kernel
|
||||
; STDERR-NEXT: remark: foo.cl:8:0: SGPRs: 0
|
||||
; STDERR-NEXT: remark: foo.cl:8:0: VGPRs: 0
|
||||
; STDERR-NEXT: remark: foo.cl:8:0: AGPRs: 0
|
||||
; STDERR-NEXT: remark: foo.cl:8:0: ScratchSize [bytes/lane]: 0
|
||||
; STDERR-NEXT: remark: foo.cl:8:0: Occupancy [waves/SIMD]: 10
|
||||
; STDERR-NEXT: remark: foo.cl:8:0: SGPRs Spill: 0
|
||||
; STDERR-NEXT: remark: foo.cl:8:0: VGPRs Spill: 0
|
||||
; STDERR-NEXT: remark: foo.cl:8:0: LDS Size [bytes/block]: 0
|
||||
define amdgpu_kernel void @empty_kernel() !dbg !7 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; STDERR: remark: foo.cl:52:0: Function Name: empty_func
|
||||
; STDERR-NEXT: remark: foo.cl:52:0: SGPRs: 0
|
||||
; STDERR-NEXT: remark: foo.cl:52:0: VGPRs: 0
|
||||
; STDERR-NEXT: remark: foo.cl:52:0: AGPRs: 0
|
||||
; STDERR-NEXT: remark: foo.cl:52:0: ScratchSize [bytes/lane]: 0
|
||||
; STDERR-NEXT: remark: foo.cl:52:0: Occupancy [waves/SIMD]: 0
|
||||
; STDERR-NEXT: remark: foo.cl:52:0: SGPRs Spill: 0
|
||||
; STDERR-NEXT: remark: foo.cl:52:0: VGPRs Spill: 0
|
||||
define void @empty_func() !dbg !8 {
|
||||
ret void
|
||||
}
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!2}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
|
||||
!1 = !DIFile(filename: "foo.cl", directory: "/tmp")
|
||||
!2 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!3 = distinct !DISubprogram(name: "test_kernel", scope: !1, file: !1, type: !4, scopeLine: 27, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
|
||||
!4 = !DISubroutineType(types: !5)
|
||||
!5 = !{null}
|
||||
!6 = distinct !DISubprogram(name: "test_func", scope: !1, file: !1, type: !4, scopeLine: 42, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
|
||||
!7 = distinct !DISubprogram(name: "empty_kernel", scope: !1, file: !1, type: !4, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
|
||||
!8 = distinct !DISubprogram(name: "empty_func", scope: !1, file: !1, type: !4, scopeLine: 52, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
|
Loading…
Reference in New Issue