AMDGPU: Don't emit amd_kernel_code_t for callable functions

This is inserted directly in the text section. The relocation
for the function ends up resolving to the beginning of the
amd_kernel_code_t header rather than the actual function
entry point.

Also skip some of the comments for initialization
that only makes sense for kernels.

llvm-svn: 300736
This commit is contained in:
Matt Arsenault 2017-04-19 19:38:10 +00:00
parent 8a76f915ae
commit 021a218dd2
2 changed files with 21 additions and 8 deletions

View File

@ -144,6 +144,10 @@ bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
}
void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
const AMDGPUMachineFunction *MFI = MF->getInfo<AMDGPUMachineFunction>();
if (!MFI->isEntryFunction())
return;
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
SIProgramInfo KernelInfo;
amd_kernel_code_t KernelCode;
@ -222,13 +226,19 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
OutStreamer->SwitchSection(CommentSection);
if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
if (MFI->isEntryFunction()) {
OutStreamer->emitRawComment(" Kernel info:", false);
} else {
OutStreamer->emitRawComment(" Function info:", false);
}
OutStreamer->emitRawComment(" codeLenInByte = " +
Twine(getFunctionCodeSize(MF)), false);
OutStreamer->emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR),
false);
OutStreamer->emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
false);
OutStreamer->emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode),
false);
OutStreamer->emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode),
@ -238,6 +248,9 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
OutStreamer->emitRawComment(" LDSByteSize: " + Twine(KernelInfo.LDSSize) +
" bytes/workgroup (compile time only)", false);
if (!MFI->isEntryFunction())
return false;
OutStreamer->emitRawComment(" SGPRBlocks: " +
Twine(KernelInfo.SGPRBlocks), false);
OutStreamer->emitRawComment(" VGPRBlocks: " +

View File

@ -27,7 +27,7 @@
; ELF: Symbol {
; ELF: Name: simple
; ELF: Size: 292
; ELF: Size: 44
; ELF: Type: Function (0x2)
; ELF: }
@ -40,11 +40,10 @@
; HSA: .globl simple
; HSA: .p2align 2
; HSA: {{^}}simple:
; HSA: .amd_kernel_code_t
; HSA: enable_sgpr_private_segment_buffer = 1
; HSA: enable_sgpr_kernarg_segment_ptr = 1
; HSA: .end_amd_kernel_code_t
; HSA: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x0
; HSA-NOT: amd_kernel_code_t
; FIXME: Check this isn't a kernarg load when calling convention implemented.
; XHSA-NOT: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x0
; Make sure we are setting the ATC bit:
; HSA-CI: s_mov_b32 s[[HI:[0-9]]], 0x100f000
@ -55,7 +54,8 @@
; HSA: .Lfunc_end0:
; HSA: .size simple, .Lfunc_end0-simple
; HSA: ; Function info:
; HSA-NOT: COMPUTE_PGM_RSRC2
define void @simple(i32 addrspace(1)* %out) {
entry:
store i32 0, i32 addrspace(1)* %out