forked from OSchip/llvm-project
R600/SI: Add comments for number of used registers.
llvm-svn: 196467
This commit is contained in:
parent
d473363876
commit
89cc49fe5d
|
@ -46,8 +46,7 @@ extern "C" void LLVMInitializeR600AsmPrinter() {
|
|||
}
|
||||
|
||||
AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
|
||||
: AsmPrinter(TM, Streamer)
|
||||
{
|
||||
: AsmPrinter(TM, Streamer) {
|
||||
DisasmEnabled = TM.getSubtarget<AMDGPUSubtarget>().dumpCode() &&
|
||||
! Streamer.hasRawTextSupport();
|
||||
}
|
||||
|
@ -56,6 +55,7 @@ AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
|
|||
/// the call to EmitFunctionHeader(), which the MCPureStreamer can't handle.
|
||||
bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
||||
SetupMachineFunction(MF);
|
||||
|
||||
if (OutStreamer.hasRawTextSupport()) {
|
||||
OutStreamer.EmitRawText("@" + MF.getName() + ":");
|
||||
}
|
||||
|
@ -65,9 +65,12 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
|||
ELF::SHT_PROGBITS, 0,
|
||||
SectionKind::getReadOnly());
|
||||
OutStreamer.SwitchSection(ConfigSection);
|
||||
|
||||
const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
|
||||
SIProgramInfo KernelInfo;
|
||||
if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
|
||||
EmitProgramInfoSI(MF);
|
||||
findNumUsedRegistersSI(MF, KernelInfo.NumSGPR, KernelInfo.NumVGPR);
|
||||
EmitProgramInfoSI(MF, KernelInfo);
|
||||
} else {
|
||||
EmitProgramInfoR600(MF);
|
||||
}
|
||||
|
@ -79,6 +82,19 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
|||
OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
|
||||
EmitFunctionBody();
|
||||
|
||||
if (isVerbose() && OutStreamer.hasRawTextSupport()) {
|
||||
const MCSectionELF *CommentSection
|
||||
= Context.getELFSection(".AMDGPU.csdata",
|
||||
ELF::SHT_PROGBITS, 0,
|
||||
SectionKind::getReadOnly());
|
||||
OutStreamer.SwitchSection(CommentSection);
|
||||
|
||||
OutStreamer.EmitRawText(
|
||||
Twine("; Kernel info:\n") +
|
||||
"; NumSgprs: " + Twine(KernelInfo.NumSGPR) + "\n" +
|
||||
"; NumVgprs: " + Twine(KernelInfo.NumVGPR) + "\n");
|
||||
}
|
||||
|
||||
if (STM.dumpCode()) {
|
||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||||
MF.dump();
|
||||
|
@ -166,8 +182,9 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(MachineFunction &MF) {
|
|||
}
|
||||
}
|
||||
|
||||
void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) {
|
||||
const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
|
||||
void AMDGPUAsmPrinter::findNumUsedRegistersSI(MachineFunction &MF,
|
||||
unsigned &NumSGPR,
|
||||
unsigned &NumVGPR) const {
|
||||
unsigned MaxSGPR = 0;
|
||||
unsigned MaxVGPR = 0;
|
||||
bool VCCUsed = false;
|
||||
|
@ -252,10 +269,24 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) {
|
|||
}
|
||||
}
|
||||
}
|
||||
if (VCCUsed) {
|
||||
|
||||
if (VCCUsed)
|
||||
MaxSGPR += 2;
|
||||
}
|
||||
SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
|
||||
NumSGPR = MaxSGPR;
|
||||
NumVGPR = MaxVGPR;
|
||||
}
|
||||
|
||||
void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &Out,
|
||||
MachineFunction &MF) const {
|
||||
findNumUsedRegistersSI(MF, Out.NumSGPR, Out.NumVGPR);
|
||||
}
|
||||
|
||||
void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF,
|
||||
const SIProgramInfo &KernelInfo) {
|
||||
const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
|
||||
|
||||
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
unsigned RsrcReg;
|
||||
switch (MFI->ShaderType) {
|
||||
default: // Fall through
|
||||
|
@ -266,7 +297,8 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) {
|
|||
}
|
||||
|
||||
OutStreamer.EmitIntValue(RsrcReg, 4);
|
||||
OutStreamer.EmitIntValue(S_00B028_VGPRS(MaxVGPR / 4) | S_00B028_SGPRS(MaxSGPR / 8), 4);
|
||||
OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.NumVGPR / 4) |
|
||||
S_00B028_SGPRS(KernelInfo.NumSGPR / 8), 4);
|
||||
|
||||
unsigned LDSAlignShift;
|
||||
if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
|
||||
|
|
|
@ -22,6 +22,21 @@
|
|||
namespace llvm {
|
||||
|
||||
class AMDGPUAsmPrinter : public AsmPrinter {
|
||||
private:
|
||||
struct SIProgramInfo {
|
||||
unsigned NumSGPR;
|
||||
unsigned NumVGPR;
|
||||
};
|
||||
|
||||
void getSIProgramInfo(SIProgramInfo &Out, MachineFunction &MF) const;
|
||||
void findNumUsedRegistersSI(MachineFunction &MF,
|
||||
unsigned &NumSGPR,
|
||||
unsigned &NumVGPR) const;
|
||||
|
||||
/// \brief Emit register usage information so that the GPU driver
|
||||
/// can correctly setup the GPU state.
|
||||
void EmitProgramInfoR600(MachineFunction &MF);
|
||||
void EmitProgramInfoSI(MachineFunction &MF, const SIProgramInfo &KernelInfo);
|
||||
|
||||
public:
|
||||
explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer);
|
||||
|
@ -32,11 +47,6 @@ public:
|
|||
return "AMDGPU Assembly Printer";
|
||||
}
|
||||
|
||||
/// \brief Emit register usage information so that the GPU driver
|
||||
/// can correctly setup the GPU state.
|
||||
void EmitProgramInfoR600(MachineFunction &MF);
|
||||
void EmitProgramInfoSI(MachineFunction &MF);
|
||||
|
||||
/// Implemented in AMDGPUMCInstLower.cpp
|
||||
virtual void EmitInstruction(const MachineInstr *MI);
|
||||
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
declare i32 @llvm.SI.tid() nounwind readnone
|
||||
|
||||
; SI-LABEL: @foo:
|
||||
; SI: .section .AMDGPU.csdata
|
||||
; SI: ; Kernel info:
|
||||
; SI: ; NumSgprs: {{[0-9]+}}
|
||||
; SI: ; NumVgprs: {{[0-9]+}}
|
||||
define void @foo(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %abase, i32 addrspace(1)* %bbase) nounwind {
|
||||
%tid = call i32 @llvm.SI.tid() nounwind readnone
|
||||
%aptr = getelementptr i32 addrspace(1)* %abase, i32 %tid
|
||||
%bptr = getelementptr i32 addrspace(1)* %bbase, i32 %tid
|
||||
%outptr = getelementptr i32 addrspace(1)* %out, i32 %tid
|
||||
%a = load i32 addrspace(1)* %aptr, align 4
|
||||
%b = load i32 addrspace(1)* %bptr, align 4
|
||||
%result = add i32 %a, %b
|
||||
store i32 %result, i32 addrspace(1)* %outptr, align 4
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue