forked from OSchip/llvm-project
AMDGPU: Convert AMDGPUResourceUsageAnalysis to a Module pass
This is more precise in the face of indirect calls and aliases, still assuming the call target is defined somewhere in the current module. This sometimes changes the order the functions are printed, and also changes the point where context errors are printed relative to stdout. This also likely has negative consequences for compile time and memory usage.
This commit is contained in:
parent
935abab65c
commit
4622afa94c
|
@ -25,7 +25,6 @@
|
|||
|
||||
#include "AMDGPUResourceUsageAnalysis.h"
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPUTargetMachine.h"
|
||||
#include "GCNSubtarget.h"
|
||||
#include "SIMachineFunctionInfo.h"
|
||||
#include "llvm/Analysis/CallGraph.h"
|
||||
|
@ -98,34 +97,39 @@ int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumVGPRs(
|
|||
return getTotalNumVGPRs(ST, NumAGPR, NumVGPR);
|
||||
}
|
||||
|
||||
bool AMDGPUResourceUsageAnalysis::runOnSCC(CallGraphSCC &SCC) {
|
||||
bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {
|
||||
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
|
||||
if (!TPC)
|
||||
return false;
|
||||
|
||||
TM = static_cast<const GCNTargetMachine *>(&TPC->getTM<TargetMachine>());
|
||||
MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
|
||||
const TargetMachine &TM = TPC->getTM<TargetMachine>();
|
||||
bool HasIndirectCall = false;
|
||||
|
||||
for (CallGraphNode *I : SCC) {
|
||||
Function *F = I->getFunction();
|
||||
if (!F || F->isDeclaration())
|
||||
for (Function &F : M) {
|
||||
if (F.isDeclaration())
|
||||
continue;
|
||||
|
||||
MachineModuleInfo &MMI =
|
||||
getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
|
||||
MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
|
||||
MachineFunction *MF = MMI.getMachineFunction(F);
|
||||
assert(MF && "function must have been generated already");
|
||||
|
||||
auto CI = CallGraphResourceInfo.insert(
|
||||
std::make_pair(&MF.getFunction(), SIFunctionResourceInfo()));
|
||||
std::make_pair(&F, SIFunctionResourceInfo()));
|
||||
SIFunctionResourceInfo &Info = CI.first->second;
|
||||
assert(CI.second && "should only be called once per function");
|
||||
Info = analyzeResourceUsage(MF);
|
||||
Info = analyzeResourceUsage(*MF, TM);
|
||||
HasIndirectCall |= Info.HasIndirectCall;
|
||||
}
|
||||
|
||||
if (HasIndirectCall)
|
||||
propagateIndirectCallRegisterUsage();
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo
|
||||
AMDGPUResourceUsageAnalysis::analyzeResourceUsage(const MachineFunction &MF) {
|
||||
AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
|
||||
const MachineFunction &MF, const TargetMachine &TM) const {
|
||||
SIFunctionResourceInfo Info;
|
||||
|
||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
|
@ -471,16 +475,9 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(const MachineFunction &MF) {
|
|||
std::max(CalleeFrameSize,
|
||||
static_cast<uint64_t>(AssumedStackSizeForExternalCall));
|
||||
|
||||
const SIFunctionResourceInfo &WorstCase =
|
||||
getWorstCaseResourceInfo(*MF.getFunction().getParent());
|
||||
MaxSGPR = std::max(WorstCase.NumExplicitSGPR - 1, MaxSGPR);
|
||||
MaxVGPR = std::max(WorstCase.NumVGPR - 1, MaxVGPR);
|
||||
MaxAGPR = std::max(WorstCase.NumAGPR - 1, MaxAGPR);
|
||||
|
||||
// Register usage of indirect calls gets handled later
|
||||
Info.UsesVCC = true;
|
||||
Info.UsesFlatScratch |=
|
||||
WorstCase.UsesFlatScratch && ST.hasFlatAddressSpace();
|
||||
Info.UsesFlatScratch = ST.hasFlatAddressSpace();
|
||||
Info.HasDynamicallySizedStack = true;
|
||||
Info.HasIndirectCall = true;
|
||||
} else {
|
||||
|
@ -509,49 +506,31 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(const MachineFunction &MF) {
|
|||
return Info;
|
||||
}
|
||||
|
||||
const AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo &
|
||||
AMDGPUResourceUsageAnalysis::getWorstCaseResourceInfo(const Module &M) {
|
||||
if (ModuleWorstCaseInfo)
|
||||
return *ModuleWorstCaseInfo;
|
||||
void AMDGPUResourceUsageAnalysis::propagateIndirectCallRegisterUsage() {
|
||||
// Collect the maximum number of registers from non-hardware-entrypoints.
|
||||
// All these functions are potential targets for indirect calls.
|
||||
int32_t NonKernelMaxSGPRs = 0;
|
||||
int32_t NonKernelMaxVGPRs = 0;
|
||||
int32_t NonKernelMaxAGPRs = 0;
|
||||
|
||||
computeWorstCaseModuleRegisterUsage(M);
|
||||
return *ModuleWorstCaseInfo;
|
||||
}
|
||||
|
||||
/// Find the worst case register usage for all callable functions in the module,
|
||||
/// assuming all reachable functions are defined in the current module.
|
||||
void AMDGPUResourceUsageAnalysis::computeWorstCaseModuleRegisterUsage(
|
||||
const Module &M) {
|
||||
assert(!ModuleWorstCaseInfo);
|
||||
ModuleWorstCaseInfo = SIFunctionResourceInfo();
|
||||
ModuleWorstCaseInfo->UsesVCC = true;
|
||||
ModuleWorstCaseInfo->HasDynamicallySizedStack = true;
|
||||
ModuleWorstCaseInfo->HasRecursion = true;
|
||||
ModuleWorstCaseInfo->HasIndirectCall = true;
|
||||
|
||||
for (const Function &F : M) {
|
||||
if (F.isIntrinsic())
|
||||
continue;
|
||||
|
||||
if (AMDGPU::isEntryFunctionCC(F.getCallingConv()))
|
||||
continue;
|
||||
|
||||
const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
|
||||
const int32_t MaxVGPR = ST.getMaxNumVGPRs(F);
|
||||
const int32_t MaxSGPR = ST.getMaxNumSGPRs(F);
|
||||
|
||||
ModuleWorstCaseInfo->NumVGPR =
|
||||
std::max(ModuleWorstCaseInfo->NumVGPR, MaxVGPR);
|
||||
|
||||
if (ST.hasMAIInsts()) {
|
||||
const int32_t MaxAGPR = ST.getMaxNumAGPRs(F);
|
||||
ModuleWorstCaseInfo->NumAGPR =
|
||||
std::max(ModuleWorstCaseInfo->NumAGPR, MaxAGPR);
|
||||
for (const auto &I : CallGraphResourceInfo) {
|
||||
if (!AMDGPU::isEntryFunctionCC(I.getFirst()->getCallingConv())) {
|
||||
auto &Info = I.getSecond();
|
||||
NonKernelMaxSGPRs = std::max(NonKernelMaxSGPRs, Info.NumExplicitSGPR);
|
||||
NonKernelMaxVGPRs = std::max(NonKernelMaxVGPRs, Info.NumVGPR);
|
||||
NonKernelMaxAGPRs = std::max(NonKernelMaxAGPRs, Info.NumAGPR);
|
||||
}
|
||||
}
|
||||
|
||||
ModuleWorstCaseInfo->NumExplicitSGPR =
|
||||
std::max(ModuleWorstCaseInfo->NumExplicitSGPR, MaxSGPR);
|
||||
|
||||
ModuleWorstCaseInfo->UsesFlatScratch |= ST.hasFlatAddressSpace();
|
||||
// Add register usage for functions with indirect calls.
|
||||
// For calls to unknown functions, we assume the maximum register usage of
|
||||
// all non-hardware-entrypoints in the current module.
|
||||
for (auto &I : CallGraphResourceInfo) {
|
||||
auto &Info = I.getSecond();
|
||||
if (Info.HasIndirectCall) {
|
||||
Info.NumExplicitSGPR = std::max(Info.NumExplicitSGPR, NonKernelMaxSGPRs);
|
||||
Info.NumVGPR = std::max(Info.NumVGPR, NonKernelMaxVGPRs);
|
||||
Info.NumAGPR = std::max(Info.NumAGPR, NonKernelMaxAGPRs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,12 +20,11 @@
|
|||
|
||||
namespace llvm {
|
||||
|
||||
class GCNTargetMachine;
|
||||
class GCNSubtarget;
|
||||
class MachineFunction;
|
||||
class TargetMachine;
|
||||
|
||||
struct AMDGPUResourceUsageAnalysis : public CallGraphSCCPass {
|
||||
struct AMDGPUResourceUsageAnalysis : public ModulePass {
|
||||
static char ID;
|
||||
|
||||
public:
|
||||
|
@ -51,15 +50,15 @@ public:
|
|||
int32_t getTotalNumVGPRs(const GCNSubtarget &ST) const;
|
||||
};
|
||||
|
||||
AMDGPUResourceUsageAnalysis() : CallGraphSCCPass(ID) {}
|
||||
AMDGPUResourceUsageAnalysis() : ModulePass(ID) {}
|
||||
|
||||
bool runOnSCC(CallGraphSCC &SCC) override;
|
||||
|
||||
bool doInitialization(CallGraph &CG) override {
|
||||
bool doInitialization(Module &M) override {
|
||||
CallGraphResourceInfo.clear();
|
||||
return CallGraphSCCPass::doInitialization(CG);
|
||||
return ModulePass::doInitialization(M);
|
||||
}
|
||||
|
||||
bool runOnModule(Module &M) override;
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addRequired<MachineModuleInfoWrapperPass>();
|
||||
AU.setPreservesAll();
|
||||
|
@ -72,16 +71,12 @@ public:
|
|||
return Info->getSecond();
|
||||
}
|
||||
|
||||
const SIFunctionResourceInfo &getWorstCaseResourceInfo(const Module &M);
|
||||
|
||||
private:
|
||||
void computeWorstCaseModuleRegisterUsage(const Module &M);
|
||||
SIFunctionResourceInfo analyzeResourceUsage(const MachineFunction &MF,
|
||||
const TargetMachine &TM) const;
|
||||
void propagateIndirectCallRegisterUsage();
|
||||
|
||||
SIFunctionResourceInfo analyzeResourceUsage(const MachineFunction &MF);
|
||||
|
||||
const GCNTargetMachine *TM = nullptr;
|
||||
DenseMap<const Function *, SIFunctionResourceInfo> CallGraphResourceInfo;
|
||||
Optional<SIFunctionResourceInfo> ModuleWorstCaseInfo;
|
||||
};
|
||||
} // namespace llvm
|
||||
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPURESOURCEUSAGEANALYSIS_H
|
||||
|
|
|
@ -154,23 +154,23 @@ bb:
|
|||
declare void @undef_func()
|
||||
|
||||
; GCN-LABEL: {{^}}kernel_call_undef_func:
|
||||
; GFX908: .amdhsa_next_free_vgpr 128
|
||||
; GFX90A: .amdhsa_next_free_vgpr 512
|
||||
; GFX90A: .amdhsa_accum_offset 256
|
||||
; GFX908: .amdhsa_next_free_vgpr 32
|
||||
; GFX90A: .amdhsa_next_free_vgpr 64
|
||||
; GFX90A: .amdhsa_accum_offset 32
|
||||
; GCN908: NumVgprs: 128
|
||||
; GCN908: NumAgprs: 128
|
||||
; GCN90A: NumVgprs: 256
|
||||
; GCN90A: NumAgprs: 256
|
||||
; GFX908: TotalNumVgprs: 128
|
||||
; GFX90A: TotalNumVgprs: 512
|
||||
; GFX908: VGPRBlocks: 31
|
||||
; GFX90A: VGPRBlocks: 63
|
||||
; GFX908: NumVGPRsForWavesPerEU: 128
|
||||
; GFX90A: NumVGPRsForWavesPerEU: 512
|
||||
; GFX90A: AccumOffset: 256
|
||||
; GFX908: Occupancy: 2
|
||||
; GFX90A: Occupancy: 1
|
||||
; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 63
|
||||
; GFX908: TotalNumVgprs: 32
|
||||
; GFX90A: TotalNumVgprs: 64
|
||||
; GFX908: VGPRBlocks: 7
|
||||
; GFX90A: VGPRBlocks: 7
|
||||
; GFX908: NumVGPRsForWavesPerEU: 32
|
||||
; GFX90A: NumVGPRsForWavesPerEU: 64
|
||||
; GFX90A: AccumOffset: 32
|
||||
; GFX908: Occupancy: 8
|
||||
; GFX90A: Occupancy: 8
|
||||
; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 7
|
||||
define amdgpu_kernel void @kernel_call_undef_func() #0 {
|
||||
bb:
|
||||
call void @undef_func()
|
||||
|
|
|
@ -144,8 +144,8 @@ attributes #0 = { nounwind }
|
|||
|
||||
; GCN: amdpal.pipelines:
|
||||
; GCN-NEXT: - .registers:
|
||||
; SDAG-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf03cf{{$}}
|
||||
; GISEL-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf03cf{{$}}
|
||||
; SDAG-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ca{{$}}
|
||||
; GISEL-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ce{{$}}
|
||||
; GCN-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
|
||||
; GCN-NEXT: .shader_functions:
|
||||
; GCN-NEXT: dynamic_stack:
|
||||
|
@ -178,24 +178,25 @@ attributes #0 = { nounwind }
|
|||
; GCN-NEXT: .vgpr_count: 0x2{{$}}
|
||||
; GCN-NEXT: no_stack_extern_call:
|
||||
; GCN-NEXT: .lds_size: 0{{$}}
|
||||
; GFX8-NEXT: .sgpr_count: 0x68{{$}}
|
||||
; GFX9-NEXT: .sgpr_count: 0x6c{{$}}
|
||||
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
|
||||
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
|
||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
|
||||
; GCN-NEXT: .vgpr_count: 0x40{{$}}
|
||||
; SDAG-NEXT: .vgpr_count: 0x2b{{$}}
|
||||
; GISEL-NEXT: .vgpr_count: 0x34{{$}}
|
||||
; GCN-NEXT: no_stack_extern_call_many_args:
|
||||
; GCN-NEXT: .lds_size: 0{{$}}
|
||||
; GFX8-NEXT: .sgpr_count: 0x68{{$}}
|
||||
; GFX9-NEXT: .sgpr_count: 0x6c{{$}}
|
||||
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
|
||||
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
|
||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x90{{$}}
|
||||
; SDAG-NEXT: .vgpr_count: 0x40{{$}}
|
||||
; GISEL-NEXT: .vgpr_count: 0x40{{$}}
|
||||
; SDAG-NEXT: .vgpr_count: 0x2b{{$}}
|
||||
; GISEL-NEXT: .vgpr_count: 0x34{{$}}
|
||||
; GCN-NEXT: no_stack_indirect_call:
|
||||
; GCN-NEXT: .lds_size: 0{{$}}
|
||||
; GFX8-NEXT: .sgpr_count: 0x68{{$}}
|
||||
; GFX9-NEXT: .sgpr_count: 0x6c{{$}}
|
||||
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
|
||||
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
|
||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
|
||||
; SDAG-NEXT: .vgpr_count: 0x40{{$}}
|
||||
; GISEL-NEXT: .vgpr_count: 0x40{{$}}
|
||||
; SDAG-NEXT: .vgpr_count: 0x2b{{$}}
|
||||
; GISEL-NEXT: .vgpr_count: 0x34{{$}}
|
||||
; GCN-NEXT: simple_lds:
|
||||
; GCN-NEXT: .lds_size: 0x100{{$}}
|
||||
; GCN-NEXT: .sgpr_count: 0x20{{$}}
|
||||
|
@ -218,17 +219,18 @@ attributes #0 = { nounwind }
|
|||
; GCN-NEXT: .vgpr_count: 0x3{{$}}
|
||||
; GCN-NEXT: simple_stack_extern_call:
|
||||
; GCN-NEXT: .lds_size: 0{{$}}
|
||||
; GFX8-NEXT: .sgpr_count: 0x68{{$}}
|
||||
; GFX9-NEXT: .sgpr_count: 0x6c{{$}}
|
||||
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
|
||||
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
|
||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
|
||||
; GCN-NEXT: .vgpr_count: 0x40{{$}}
|
||||
; SDAG-NEXT: .vgpr_count: 0x2b{{$}}
|
||||
; GISEL-NEXT: .vgpr_count: 0x34{{$}}
|
||||
; GCN-NEXT: simple_stack_indirect_call:
|
||||
; GCN-NEXT: .lds_size: 0{{$}}
|
||||
; GFX8-NEXT: .sgpr_count: 0x68{{$}}
|
||||
; GFX9-NEXT: .sgpr_count: 0x6c{{$}}
|
||||
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
|
||||
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
|
||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
|
||||
; SDAG-NEXT: .vgpr_count: 0x40{{$}}
|
||||
; GISEL-NEXT: .vgpr_count: 0x40{{$}}
|
||||
; SDAG-NEXT: .vgpr_count: 0x2b{{$}}
|
||||
; GISEL-NEXT: .vgpr_count: 0x34{{$}}
|
||||
; GCN-NEXT: simple_stack_recurse:
|
||||
; GCN-NEXT: .lds_size: 0{{$}}
|
||||
; GCN-NEXT: .sgpr_count: 0x26{{$}}
|
||||
|
|
|
@ -556,9 +556,8 @@ attributes #512 = { nounwind "amdgpu-flat-work-group-size"="512,512" }
|
|||
|
||||
; GCN-LABEL: {{^}}f1024:
|
||||
; GFX9: NumVgprs: 64
|
||||
; GFX90A: NumVgprs: 128
|
||||
; GFX90A: NumAgprs: 128
|
||||
; GFX90A: TotalNumVgprs: 256
|
||||
; GFX90A: NumAgprs: 64
|
||||
; GFX90A: TotalNumVgprs: 128
|
||||
; GFX10WGP-WAVE32: NumVgprs: 128
|
||||
; GFX10WGP-WAVE64: NumVgprs: 128
|
||||
; GFX10CU-WAVE32: NumVgprs: 64
|
||||
|
|
|
@ -8,12 +8,12 @@
|
|||
@alias = hidden alias void (), void ()* @aliasee_default
|
||||
|
||||
; ALL-LABEL: {{^}}kernel:
|
||||
; GFX908: .amdhsa_next_free_vgpr 64
|
||||
; GFX908-NEXT: .amdhsa_next_free_sgpr 102
|
||||
; GFX908: .amdhsa_next_free_vgpr 41
|
||||
; GFX908-NEXT: .amdhsa_next_free_sgpr 33
|
||||
|
||||
; GFX90A: .amdhsa_next_free_vgpr 256
|
||||
; GFX90A-NEXT: .amdhsa_next_free_sgpr 102
|
||||
; GFX90A-NEXT: .amdhsa_accum_offset 128
|
||||
; GFX90A: .amdhsa_next_free_vgpr 71
|
||||
; GFX90A-NEXT: .amdhsa_next_free_sgpr 33
|
||||
; GFX90A-NEXT: .amdhsa_accum_offset 44
|
||||
define amdgpu_kernel void @kernel() #0 {
|
||||
bb:
|
||||
call void @alias() #2
|
||||
|
|
|
@ -7,8 +7,8 @@
|
|||
@alias0 = hidden alias void (), void ()* @aliasee_default_vgpr64_sgpr102
|
||||
|
||||
; CHECK-LABEL: {{^}}kernel0:
|
||||
; CHECK: .amdhsa_next_free_vgpr 64
|
||||
; CHECK-NEXT: .amdhsa_next_free_sgpr 102
|
||||
; CHECK: .amdhsa_next_free_vgpr 53
|
||||
; CHECK-NEXT: .amdhsa_next_free_sgpr 33
|
||||
define amdgpu_kernel void @kernel0() #0 {
|
||||
bb:
|
||||
call void @alias0() #2
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
|
||||
; CHECK-LABEL: {{^}}kernel1:
|
||||
; CHECK: .amdhsa_next_free_vgpr 42
|
||||
; CHECK-NEXT: .amdhsa_next_free_sgpr 74
|
||||
; CHECK-NEXT: .amdhsa_next_free_sgpr 33
|
||||
define amdgpu_kernel void @kernel1() #0 {
|
||||
bb:
|
||||
call void asm sideeffect "; clobber v40 ", "~{v40}"()
|
||||
|
|
|
@ -7,8 +7,8 @@
|
|||
@alias2 = hidden alias void (), void()* @aliasee_vgpr64_sgpr102
|
||||
|
||||
; CHECK-LABEL: {{^}}kernel2:
|
||||
; CHECK: .amdhsa_next_free_vgpr 64
|
||||
; CHECK-NEXT: .amdhsa_next_free_sgpr 102
|
||||
; CHECK: .amdhsa_next_free_vgpr 53
|
||||
; CHECK-NEXT: .amdhsa_next_free_sgpr 33
|
||||
define amdgpu_kernel void @kernel2() #0 {
|
||||
bb:
|
||||
call void @alias2() #2
|
||||
|
|
|
@ -7,8 +7,8 @@
|
|||
@alias3 = hidden alias void (), void ()* @aliasee_vgpr256_sgpr102
|
||||
|
||||
; CHECK-LABEL: {{^}}kernel3:
|
||||
; CHECK: .amdhsa_next_free_vgpr 256
|
||||
; CHECK-NEXT: .amdhsa_next_free_sgpr 102
|
||||
; CHECK: .amdhsa_next_free_vgpr 253
|
||||
; CHECK-NEXT: .amdhsa_next_free_sgpr 33
|
||||
define amdgpu_kernel void @kernel3() #0 {
|
||||
bb:
|
||||
call void @alias3() #2
|
||||
|
|
|
@ -227,10 +227,10 @@ define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
|
|||
; Make sure there's no assert when a sgpr96 is used.
|
||||
; GCN-LABEL: {{^}}count_use_sgpr96_external_call
|
||||
; GCN: ; sgpr96 s[{{[0-9]+}}:{{[0-9]+}}]
|
||||
; CI: NumSgprs: 104
|
||||
; VI-NOBUG: NumSgprs: 108
|
||||
; CI: NumSgprs: 84
|
||||
; VI-NOBUG: NumSgprs: 86
|
||||
; VI-BUG: NumSgprs: 96
|
||||
; GCN: NumVgprs: 64
|
||||
; GCN: NumVgprs: 50
|
||||
define amdgpu_kernel void @count_use_sgpr96_external_call() {
|
||||
entry:
|
||||
tail call void asm sideeffect "; sgpr96 $0", "s"(<3 x i32> <i32 10, i32 11, i32 12>) #1
|
||||
|
@ -241,10 +241,10 @@ entry:
|
|||
; Make sure there's no assert when a sgpr160 is used.
|
||||
; GCN-LABEL: {{^}}count_use_sgpr160_external_call
|
||||
; GCN: ; sgpr160 s[{{[0-9]+}}:{{[0-9]+}}]
|
||||
; CI: NumSgprs: 104
|
||||
; VI-NOBUG: NumSgprs: 108
|
||||
; CI: NumSgprs: 84
|
||||
; VI-NOBUG: NumSgprs: 86
|
||||
; VI-BUG: NumSgprs: 96
|
||||
; GCN: NumVgprs: 64
|
||||
; GCN: NumVgprs: 50
|
||||
define amdgpu_kernel void @count_use_sgpr160_external_call() {
|
||||
entry:
|
||||
tail call void asm sideeffect "; sgpr160 $0", "s"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1
|
||||
|
@ -255,10 +255,10 @@ entry:
|
|||
; Make sure there's no assert when a vgpr160 is used.
|
||||
; GCN-LABEL: {{^}}count_use_vgpr160_external_call
|
||||
; GCN: ; vgpr160 v[{{[0-9]+}}:{{[0-9]+}}]
|
||||
; CI: NumSgprs: 104
|
||||
; VI-NOBUG: NumSgprs: 108
|
||||
; CI: NumSgprs: 84
|
||||
; VI-NOBUG: NumSgprs: 86
|
||||
; VI-BUG: NumSgprs: 96
|
||||
; GCN: NumVgprs: 64
|
||||
; GCN: NumVgprs: 50
|
||||
define amdgpu_kernel void @count_use_vgpr160_external_call() {
|
||||
entry:
|
||||
tail call void asm sideeffect "; vgpr160 $0", "v"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1
|
||||
|
|
|
@ -16,8 +16,8 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(i8) {
|
|||
; GCN-NEXT: amd_machine_version_stepping = 0
|
||||
; GCN-NEXT: kernel_code_entry_byte_offset = 256
|
||||
; GCN-NEXT: kernel_code_prefetch_byte_size = 0
|
||||
; GCN-NEXT: granulated_workitem_vgpr_count = 15
|
||||
; GCN-NEXT: granulated_wavefront_sgpr_count = 12
|
||||
; GCN-NEXT: granulated_workitem_vgpr_count = 10
|
||||
; GCN-NEXT: granulated_wavefront_sgpr_count = 8
|
||||
; GCN-NEXT: priority = 0
|
||||
; GCN-NEXT: float_mode = 240
|
||||
; GCN-NEXT: priv = 0
|
||||
|
@ -60,8 +60,8 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(i8) {
|
|||
; GCN-NEXT: gds_segment_byte_size = 0
|
||||
; GCN-NEXT: kernarg_segment_byte_size = 64
|
||||
; GCN-NEXT: workgroup_fbarrier_count = 0
|
||||
; GCN-NEXT: wavefront_sgpr_count = 104
|
||||
; GCN-NEXT: workitem_vgpr_count = 64
|
||||
; GCN-NEXT: wavefront_sgpr_count = 68
|
||||
; GCN-NEXT: workitem_vgpr_count = 42
|
||||
; GCN-NEXT: reserved_vgpr_first = 0
|
||||
; GCN-NEXT: reserved_vgpr_count = 0
|
||||
; GCN-NEXT: reserved_sgpr_first = 0
|
||||
|
@ -109,8 +109,8 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(i8) {
|
|||
; GISEL-NEXT: amd_machine_version_stepping = 0
|
||||
; GISEL-NEXT: kernel_code_entry_byte_offset = 256
|
||||
; GISEL-NEXT: kernel_code_prefetch_byte_size = 0
|
||||
; GISEL-NEXT: granulated_workitem_vgpr_count = 15
|
||||
; GISEL-NEXT: granulated_wavefront_sgpr_count = 12
|
||||
; GISEL-NEXT: granulated_workitem_vgpr_count = 10
|
||||
; GISEL-NEXT: granulated_wavefront_sgpr_count = 8
|
||||
; GISEL-NEXT: priority = 0
|
||||
; GISEL-NEXT: float_mode = 240
|
||||
; GISEL-NEXT: priv = 0
|
||||
|
@ -153,8 +153,8 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(i8) {
|
|||
; GISEL-NEXT: gds_segment_byte_size = 0
|
||||
; GISEL-NEXT: kernarg_segment_byte_size = 64
|
||||
; GISEL-NEXT: workgroup_fbarrier_count = 0
|
||||
; GISEL-NEXT: wavefront_sgpr_count = 104
|
||||
; GISEL-NEXT: workitem_vgpr_count = 64
|
||||
; GISEL-NEXT: wavefront_sgpr_count = 68
|
||||
; GISEL-NEXT: workitem_vgpr_count = 42
|
||||
; GISEL-NEXT: reserved_vgpr_first = 0
|
||||
; GISEL-NEXT: reserved_vgpr_count = 0
|
||||
; GISEL-NEXT: reserved_sgpr_first = 0
|
||||
|
@ -207,8 +207,8 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg(i8) {
|
|||
; GCN-NEXT: amd_machine_version_stepping = 0
|
||||
; GCN-NEXT: kernel_code_entry_byte_offset = 256
|
||||
; GCN-NEXT: kernel_code_prefetch_byte_size = 0
|
||||
; GCN-NEXT: granulated_workitem_vgpr_count = 15
|
||||
; GCN-NEXT: granulated_wavefront_sgpr_count = 12
|
||||
; GCN-NEXT: granulated_workitem_vgpr_count = 10
|
||||
; GCN-NEXT: granulated_wavefront_sgpr_count = 8
|
||||
; GCN-NEXT: priority = 0
|
||||
; GCN-NEXT: float_mode = 240
|
||||
; GCN-NEXT: priv = 0
|
||||
|
@ -251,8 +251,8 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg(i8) {
|
|||
; GCN-NEXT: gds_segment_byte_size = 0
|
||||
; GCN-NEXT: kernarg_segment_byte_size = 64
|
||||
; GCN-NEXT: workgroup_fbarrier_count = 0
|
||||
; GCN-NEXT: wavefront_sgpr_count = 104
|
||||
; GCN-NEXT: workitem_vgpr_count = 64
|
||||
; GCN-NEXT: wavefront_sgpr_count = 68
|
||||
; GCN-NEXT: workitem_vgpr_count = 42
|
||||
; GCN-NEXT: reserved_vgpr_first = 0
|
||||
; GCN-NEXT: reserved_vgpr_count = 0
|
||||
; GCN-NEXT: reserved_sgpr_first = 0
|
||||
|
@ -301,8 +301,8 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg(i8) {
|
|||
; GISEL-NEXT: amd_machine_version_stepping = 0
|
||||
; GISEL-NEXT: kernel_code_entry_byte_offset = 256
|
||||
; GISEL-NEXT: kernel_code_prefetch_byte_size = 0
|
||||
; GISEL-NEXT: granulated_workitem_vgpr_count = 15
|
||||
; GISEL-NEXT: granulated_wavefront_sgpr_count = 12
|
||||
; GISEL-NEXT: granulated_workitem_vgpr_count = 10
|
||||
; GISEL-NEXT: granulated_wavefront_sgpr_count = 8
|
||||
; GISEL-NEXT: priority = 0
|
||||
; GISEL-NEXT: float_mode = 240
|
||||
; GISEL-NEXT: priv = 0
|
||||
|
@ -345,8 +345,8 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg(i8) {
|
|||
; GISEL-NEXT: gds_segment_byte_size = 0
|
||||
; GISEL-NEXT: kernarg_segment_byte_size = 64
|
||||
; GISEL-NEXT: workgroup_fbarrier_count = 0
|
||||
; GISEL-NEXT: wavefront_sgpr_count = 104
|
||||
; GISEL-NEXT: workitem_vgpr_count = 64
|
||||
; GISEL-NEXT: wavefront_sgpr_count = 68
|
||||
; GISEL-NEXT: workitem_vgpr_count = 42
|
||||
; GISEL-NEXT: reserved_vgpr_first = 0
|
||||
; GISEL-NEXT: reserved_vgpr_count = 0
|
||||
; GISEL-NEXT: reserved_sgpr_first = 0
|
||||
|
|
|
@ -91,6 +91,7 @@ define void @func_call_tail_call() #1 {
|
|||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}void_func_void:
|
||||
define void @void_func_void() noinline {
|
||||
ret void
|
||||
}
|
||||
|
@ -105,6 +106,7 @@ define void @test_funcx2() #0 {
|
|||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wombat:
|
||||
define weak amdgpu_kernel void @wombat(i32* %arg, i32* %arg2) {
|
||||
bb:
|
||||
call void @hoge() #0
|
||||
|
|
|
@ -136,12 +136,12 @@
|
|||
; GCN-O0-NEXT: Branch relaxation pass
|
||||
; GCN-O0-NEXT: Register Usage Information Collector Pass
|
||||
; GCN-O0-NEXT: Live DEBUG_VALUE analysis
|
||||
; GCN-O0-NEXT: Function register usage analysis
|
||||
; GCN-O0-NEXT: FunctionPass Manager
|
||||
; GCN-O0-NEXT: Lazy Machine Block Frequency Analysis
|
||||
; GCN-O0-NEXT: Machine Optimization Remark Emitter
|
||||
; GCN-O0-NEXT: AMDGPU Assembly Printer
|
||||
; GCN-O0-NEXT: Free MachineFunction
|
||||
; GCN-O0-NEXT: Function register usage analysis
|
||||
; GCN-O0-NEXT: FunctionPass Manager
|
||||
; GCN-O0-NEXT: Lazy Machine Block Frequency Analysis
|
||||
; GCN-O0-NEXT: Machine Optimization Remark Emitter
|
||||
; GCN-O0-NEXT: AMDGPU Assembly Printer
|
||||
; GCN-O0-NEXT: Free MachineFunction
|
||||
; GCN-O0-NEXT:Pass Arguments: -domtree
|
||||
; GCN-O0-NEXT: FunctionPass Manager
|
||||
; GCN-O0-NEXT: Dominator Tree Construction
|
||||
|
@ -388,12 +388,12 @@
|
|||
; GCN-O1-NEXT: Branch relaxation pass
|
||||
; GCN-O1-NEXT: Register Usage Information Collector Pass
|
||||
; GCN-O1-NEXT: Live DEBUG_VALUE analysis
|
||||
; GCN-O1-NEXT: Function register usage analysis
|
||||
; GCN-O1-NEXT: FunctionPass Manager
|
||||
; GCN-O1-NEXT: Lazy Machine Block Frequency Analysis
|
||||
; GCN-O1-NEXT: Machine Optimization Remark Emitter
|
||||
; GCN-O1-NEXT: AMDGPU Assembly Printer
|
||||
; GCN-O1-NEXT: Free MachineFunction
|
||||
; GCN-O1-NEXT: Function register usage analysis
|
||||
; GCN-O1-NEXT: FunctionPass Manager
|
||||
; GCN-O1-NEXT: Lazy Machine Block Frequency Analysis
|
||||
; GCN-O1-NEXT: Machine Optimization Remark Emitter
|
||||
; GCN-O1-NEXT: AMDGPU Assembly Printer
|
||||
; GCN-O1-NEXT: Free MachineFunction
|
||||
; GCN-O1-NEXT:Pass Arguments: -domtree
|
||||
; GCN-O1-NEXT: FunctionPass Manager
|
||||
; GCN-O1-NEXT: Dominator Tree Construction
|
||||
|
@ -673,12 +673,12 @@
|
|||
; GCN-O1-OPTS-NEXT: Branch relaxation pass
|
||||
; GCN-O1-OPTS-NEXT: Register Usage Information Collector Pass
|
||||
; GCN-O1-OPTS-NEXT: Live DEBUG_VALUE analysis
|
||||
; GCN-O1-OPTS-NEXT: Function register usage analysis
|
||||
; GCN-O1-OPTS-NEXT: FunctionPass Manager
|
||||
; GCN-O1-OPTS-NEXT: Lazy Machine Block Frequency Analysis
|
||||
; GCN-O1-OPTS-NEXT: Machine Optimization Remark Emitter
|
||||
; GCN-O1-OPTS-NEXT: AMDGPU Assembly Printer
|
||||
; GCN-O1-OPTS-NEXT: Free MachineFunction
|
||||
; GCN-O1-OPTS-NEXT: Function register usage analysis
|
||||
; GCN-O1-OPTS-NEXT: FunctionPass Manager
|
||||
; GCN-O1-OPTS-NEXT: Lazy Machine Block Frequency Analysis
|
||||
; GCN-O1-OPTS-NEXT: Machine Optimization Remark Emitter
|
||||
; GCN-O1-OPTS-NEXT: AMDGPU Assembly Printer
|
||||
; GCN-O1-OPTS-NEXT: Free MachineFunction
|
||||
; GCN-O1-OPTS-NEXT:Pass Arguments: -domtree
|
||||
; GCN-O1-OPTS-NEXT: FunctionPass Manager
|
||||
; GCN-O1-OPTS-NEXT: Dominator Tree Construction
|
||||
|
@ -960,12 +960,12 @@
|
|||
; GCN-O2-NEXT: Branch relaxation pass
|
||||
; GCN-O2-NEXT: Register Usage Information Collector Pass
|
||||
; GCN-O2-NEXT: Live DEBUG_VALUE analysis
|
||||
; GCN-O2-NEXT: Function register usage analysis
|
||||
; GCN-O2-NEXT: FunctionPass Manager
|
||||
; GCN-O2-NEXT: Lazy Machine Block Frequency Analysis
|
||||
; GCN-O2-NEXT: Machine Optimization Remark Emitter
|
||||
; GCN-O2-NEXT: AMDGPU Assembly Printer
|
||||
; GCN-O2-NEXT: Free MachineFunction
|
||||
; GCN-O2-NEXT: Function register usage analysis
|
||||
; GCN-O2-NEXT: FunctionPass Manager
|
||||
; GCN-O2-NEXT: Lazy Machine Block Frequency Analysis
|
||||
; GCN-O2-NEXT: Machine Optimization Remark Emitter
|
||||
; GCN-O2-NEXT: AMDGPU Assembly Printer
|
||||
; GCN-O2-NEXT: Free MachineFunction
|
||||
; GCN-O2-NEXT:Pass Arguments: -domtree
|
||||
; GCN-O2-NEXT: FunctionPass Manager
|
||||
; GCN-O2-NEXT: Dominator Tree Construction
|
||||
|
@ -1260,12 +1260,12 @@
|
|||
; GCN-O3-NEXT: Branch relaxation pass
|
||||
; GCN-O3-NEXT: Register Usage Information Collector Pass
|
||||
; GCN-O3-NEXT: Live DEBUG_VALUE analysis
|
||||
; GCN-O3-NEXT: Function register usage analysis
|
||||
; GCN-O3-NEXT: FunctionPass Manager
|
||||
; GCN-O3-NEXT: Lazy Machine Block Frequency Analysis
|
||||
; GCN-O3-NEXT: Machine Optimization Remark Emitter
|
||||
; GCN-O3-NEXT: AMDGPU Assembly Printer
|
||||
; GCN-O3-NEXT: Free MachineFunction
|
||||
; GCN-O3-NEXT: Function register usage analysis
|
||||
; GCN-O3-NEXT: FunctionPass Manager
|
||||
; GCN-O3-NEXT: Lazy Machine Block Frequency Analysis
|
||||
; GCN-O3-NEXT: Machine Optimization Remark Emitter
|
||||
; GCN-O3-NEXT: AMDGPU Assembly Printer
|
||||
; GCN-O3-NEXT: Free MachineFunction
|
||||
; GCN-O3-NEXT:Pass Arguments: -domtree
|
||||
; GCN-O3-NEXT: FunctionPass Manager
|
||||
; GCN-O3-NEXT: Dominator Tree Construction
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX908 %s
|
||||
; RUN: not llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefixes=GCN,GFX900 %s
|
||||
|
||||
; GFX900: couldn't allocate input reg for constraint 'a'
|
||||
|
||||
|
||||
; GCN-LABEL: {{^}}max_10_vgprs:
|
||||
; GFX900-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
|
||||
; GFX900-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
|
||||
|
@ -65,8 +68,6 @@ define amdgpu_kernel void @max_10_vgprs(i32 addrspace(1)* %p) #0 {
|
|||
; GFX908: v_accvgpr_read_b32 [[V_REG]], [[A_REG]]
|
||||
; GFX908-NOT: buffer_
|
||||
|
||||
; GFX900: couldn't allocate input reg for constraint 'a'
|
||||
|
||||
; GFX908: NumVgprs: 10
|
||||
; GFX908: ScratchSize: 0
|
||||
; GFX908: VGPRBlocks: 2
|
||||
|
|
|
@ -14,6 +14,9 @@
|
|||
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s
|
||||
|
||||
; GCN-WARNING: warning: <unknown>:0:0: in function hsa_debugtrap void (i32 addrspace(1)*): debugtrap handler not supported
|
||||
|
||||
|
||||
declare void @llvm.trap() #0
|
||||
declare void @llvm.debugtrap() #1
|
||||
|
||||
|
@ -54,7 +57,6 @@ define amdgpu_kernel void @hsa_trap(i32 addrspace(1)* nocapture readonly %arg0)
|
|||
; NOMESA-TRAP: .long 47180
|
||||
; NOMESA-TRAP-NEXT: .long 144
|
||||
|
||||
; GCN-WARNING: warning: <unknown>:0:0: in function hsa_debugtrap void (i32 addrspace(1)*): debugtrap handler not supported
|
||||
; GCN-LABEL: {{^}}hsa_debugtrap:
|
||||
; HSA-TRAP: enable_trap_handler = 0
|
||||
; HSA-TRAP: s_trap 3
|
||||
|
|
Loading…
Reference in New Issue