forked from OSchip/llvm-project
AMDGPU: Don't emit amd_kernel_code_t for callable functions
This is inserted directly in the text section. The relocation for the function ends up resolving to the beginning of the amd_kernel_code_t header rather than the actual function entry point. Also skip some of the comments for initialization that only makes sense for kernels. llvm-svn: 300736
This commit is contained in:
parent
8a76f915ae
commit
021a218dd2
|
@ -144,6 +144,10 @@ bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
|
||||||
}
|
}
|
||||||
|
|
||||||
void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
|
void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
|
||||||
|
const AMDGPUMachineFunction *MFI = MF->getInfo<AMDGPUMachineFunction>();
|
||||||
|
if (!MFI->isEntryFunction())
|
||||||
|
return;
|
||||||
|
|
||||||
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
|
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
|
||||||
SIProgramInfo KernelInfo;
|
SIProgramInfo KernelInfo;
|
||||||
amd_kernel_code_t KernelCode;
|
amd_kernel_code_t KernelCode;
|
||||||
|
@ -222,13 +226,19 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
||||||
OutStreamer->SwitchSection(CommentSection);
|
OutStreamer->SwitchSection(CommentSection);
|
||||||
|
|
||||||
if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
|
if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
|
||||||
OutStreamer->emitRawComment(" Kernel info:", false);
|
if (MFI->isEntryFunction()) {
|
||||||
|
OutStreamer->emitRawComment(" Kernel info:", false);
|
||||||
|
} else {
|
||||||
|
OutStreamer->emitRawComment(" Function info:", false);
|
||||||
|
}
|
||||||
|
|
||||||
OutStreamer->emitRawComment(" codeLenInByte = " +
|
OutStreamer->emitRawComment(" codeLenInByte = " +
|
||||||
Twine(getFunctionCodeSize(MF)), false);
|
Twine(getFunctionCodeSize(MF)), false);
|
||||||
OutStreamer->emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR),
|
OutStreamer->emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR),
|
||||||
false);
|
false);
|
||||||
OutStreamer->emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
|
OutStreamer->emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
|
||||||
false);
|
false);
|
||||||
|
|
||||||
OutStreamer->emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode),
|
OutStreamer->emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode),
|
||||||
false);
|
false);
|
||||||
OutStreamer->emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode),
|
OutStreamer->emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode),
|
||||||
|
@ -238,6 +248,9 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
||||||
OutStreamer->emitRawComment(" LDSByteSize: " + Twine(KernelInfo.LDSSize) +
|
OutStreamer->emitRawComment(" LDSByteSize: " + Twine(KernelInfo.LDSSize) +
|
||||||
" bytes/workgroup (compile time only)", false);
|
" bytes/workgroup (compile time only)", false);
|
||||||
|
|
||||||
|
if (!MFI->isEntryFunction())
|
||||||
|
return false;
|
||||||
|
|
||||||
OutStreamer->emitRawComment(" SGPRBlocks: " +
|
OutStreamer->emitRawComment(" SGPRBlocks: " +
|
||||||
Twine(KernelInfo.SGPRBlocks), false);
|
Twine(KernelInfo.SGPRBlocks), false);
|
||||||
OutStreamer->emitRawComment(" VGPRBlocks: " +
|
OutStreamer->emitRawComment(" VGPRBlocks: " +
|
||||||
|
|
|
@ -27,7 +27,7 @@
|
||||||
|
|
||||||
; ELF: Symbol {
|
; ELF: Symbol {
|
||||||
; ELF: Name: simple
|
; ELF: Name: simple
|
||||||
; ELF: Size: 292
|
; ELF: Size: 44
|
||||||
; ELF: Type: Function (0x2)
|
; ELF: Type: Function (0x2)
|
||||||
; ELF: }
|
; ELF: }
|
||||||
|
|
||||||
|
@ -40,11 +40,10 @@
|
||||||
; HSA: .globl simple
|
; HSA: .globl simple
|
||||||
; HSA: .p2align 2
|
; HSA: .p2align 2
|
||||||
; HSA: {{^}}simple:
|
; HSA: {{^}}simple:
|
||||||
; HSA: .amd_kernel_code_t
|
; HSA-NOT: amd_kernel_code_t
|
||||||
; HSA: enable_sgpr_private_segment_buffer = 1
|
|
||||||
; HSA: enable_sgpr_kernarg_segment_ptr = 1
|
; FIXME: Check this isn't a kernarg load when calling convention implemented.
|
||||||
; HSA: .end_amd_kernel_code_t
|
; XHSA-NOT: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x0
|
||||||
; HSA: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x0
|
|
||||||
|
|
||||||
; Make sure we are setting the ATC bit:
|
; Make sure we are setting the ATC bit:
|
||||||
; HSA-CI: s_mov_b32 s[[HI:[0-9]]], 0x100f000
|
; HSA-CI: s_mov_b32 s[[HI:[0-9]]], 0x100f000
|
||||||
|
@ -55,7 +54,8 @@
|
||||||
|
|
||||||
; HSA: .Lfunc_end0:
|
; HSA: .Lfunc_end0:
|
||||||
; HSA: .size simple, .Lfunc_end0-simple
|
; HSA: .size simple, .Lfunc_end0-simple
|
||||||
|
; HSA: ; Function info:
|
||||||
|
; HSA-NOT: COMPUTE_PGM_RSRC2
|
||||||
define void @simple(i32 addrspace(1)* %out) {
|
define void @simple(i32 addrspace(1)* %out) {
|
||||||
entry:
|
entry:
|
||||||
store i32 0, i32 addrspace(1)* %out
|
store i32 0, i32 addrspace(1)* %out
|
||||||
|
|
Loading…
Reference in New Issue