forked from OSchip/llvm-project
AMDGPU: Don't emit amd_kernel_code_t for callable functions
This is inserted directly in the text section. The relocation for the function ends up resolving to the beginning of the amd_kernel_code_t header rather than the actual function entry point. Also skip some of the comments for initialization that only makes sense for kernels. llvm-svn: 300736
This commit is contained in:
parent
8a76f915ae
commit
021a218dd2
|
@ -144,6 +144,10 @@ bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
|
|||
}
|
||||
|
||||
void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
|
||||
const AMDGPUMachineFunction *MFI = MF->getInfo<AMDGPUMachineFunction>();
|
||||
if (!MFI->isEntryFunction())
|
||||
return;
|
||||
|
||||
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
|
||||
SIProgramInfo KernelInfo;
|
||||
amd_kernel_code_t KernelCode;
|
||||
|
@ -222,13 +226,19 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
|||
OutStreamer->SwitchSection(CommentSection);
|
||||
|
||||
if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
|
||||
if (MFI->isEntryFunction()) {
|
||||
OutStreamer->emitRawComment(" Kernel info:", false);
|
||||
} else {
|
||||
OutStreamer->emitRawComment(" Function info:", false);
|
||||
}
|
||||
|
||||
OutStreamer->emitRawComment(" codeLenInByte = " +
|
||||
Twine(getFunctionCodeSize(MF)), false);
|
||||
OutStreamer->emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR),
|
||||
false);
|
||||
OutStreamer->emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
|
||||
false);
|
||||
|
||||
OutStreamer->emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode),
|
||||
false);
|
||||
OutStreamer->emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode),
|
||||
|
@ -238,6 +248,9 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
|||
OutStreamer->emitRawComment(" LDSByteSize: " + Twine(KernelInfo.LDSSize) +
|
||||
" bytes/workgroup (compile time only)", false);
|
||||
|
||||
if (!MFI->isEntryFunction())
|
||||
return false;
|
||||
|
||||
OutStreamer->emitRawComment(" SGPRBlocks: " +
|
||||
Twine(KernelInfo.SGPRBlocks), false);
|
||||
OutStreamer->emitRawComment(" VGPRBlocks: " +
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
|
||||
; ELF: Symbol {
|
||||
; ELF: Name: simple
|
||||
; ELF: Size: 292
|
||||
; ELF: Size: 44
|
||||
; ELF: Type: Function (0x2)
|
||||
; ELF: }
|
||||
|
||||
|
@ -40,11 +40,10 @@
|
|||
; HSA: .globl simple
|
||||
; HSA: .p2align 2
|
||||
; HSA: {{^}}simple:
|
||||
; HSA: .amd_kernel_code_t
|
||||
; HSA: enable_sgpr_private_segment_buffer = 1
|
||||
; HSA: enable_sgpr_kernarg_segment_ptr = 1
|
||||
; HSA: .end_amd_kernel_code_t
|
||||
; HSA: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x0
|
||||
; HSA-NOT: amd_kernel_code_t
|
||||
|
||||
; FIXME: Check this isn't a kernarg load when calling convention implemented.
|
||||
; XHSA-NOT: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x0
|
||||
|
||||
; Make sure we are setting the ATC bit:
|
||||
; HSA-CI: s_mov_b32 s[[HI:[0-9]]], 0x100f000
|
||||
|
@ -55,7 +54,8 @@
|
|||
|
||||
; HSA: .Lfunc_end0:
|
||||
; HSA: .size simple, .Lfunc_end0-simple
|
||||
|
||||
; HSA: ; Function info:
|
||||
; HSA-NOT: COMPUTE_PGM_RSRC2
|
||||
define void @simple(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
store i32 0, i32 addrspace(1)* %out
|
||||
|
|
Loading…
Reference in New Issue