forked from OSchip/llvm-project
[AMDGPU] gfx1010 s_code_end generation
Also add some missing metadata in the streamer. Differential Revision: https://reviews.llvm.org/D61531 llvm-svn: 359937
This commit is contained in:
parent
be7138b467
commit
41bbe101a2
|
@ -295,6 +295,12 @@ void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
|
|||
|
||||
bool AMDGPUAsmPrinter::doFinalization(Module &M) {
|
||||
CallGraphResourceInfo.clear();
|
||||
|
||||
if (AMDGPU::isGFX10(*getGlobalSTI())) {
|
||||
OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
|
||||
getTargetStreamer()->EmitCodeEnd();
|
||||
}
|
||||
|
||||
return AsmPrinter::doFinalization(M);
|
||||
}
|
||||
|
||||
|
@ -928,6 +934,11 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
|||
1ULL << ScratchAlignShift) >>
|
||||
ScratchAlignShift;
|
||||
|
||||
if (getIsaVersion(getGlobalSTI()->getCPU()).Major >= 10) {
|
||||
ProgInfo.WgpMode = STM.isCuModeEnabled() ? 0 : 1;
|
||||
ProgInfo.MemOrdered = 1;
|
||||
}
|
||||
|
||||
ProgInfo.ComputePGMRSrc1 =
|
||||
S_00B848_VGPRS(ProgInfo.VGPRBlocks) |
|
||||
S_00B848_SGPRS(ProgInfo.SGPRBlocks) |
|
||||
|
@ -936,7 +947,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
|||
S_00B848_PRIV(ProgInfo.Priv) |
|
||||
S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp) |
|
||||
S_00B848_DEBUG_MODE(ProgInfo.DebugMode) |
|
||||
S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
|
||||
S_00B848_IEEE_MODE(ProgInfo.IEEEMode) |
|
||||
S_00B848_WGP_MODE(ProgInfo.WgpMode) |
|
||||
S_00B848_MEM_ORDERED(ProgInfo.MemOrdered);
|
||||
|
||||
// 0 = X, 1 = XY, 2 = XYZ
|
||||
unsigned TIDIGCompCnt = 0;
|
||||
|
@ -1077,7 +1090,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
|
|||
Out.compute_pgm_resource_registers =
|
||||
CurrentProgramInfo.ComputePGMRSrc1 |
|
||||
(CurrentProgramInfo.ComputePGMRSrc2 << 32);
|
||||
Out.code_properties = AMD_CODE_PROPERTY_IS_PTR64;
|
||||
Out.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;
|
||||
|
||||
if (CurrentProgramInfo.DynamicCallStack)
|
||||
Out.code_properties |= AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK;
|
||||
|
|
|
@ -235,6 +235,13 @@ bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
|
|||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPUTargetAsmStreamer::EmitCodeEnd() {
|
||||
const uint32_t Encoded_s_code_end = 0xbf9f0000;
|
||||
OS << "\t.p2alignl 6, " << Encoded_s_code_end << '\n';
|
||||
OS << "\t.fill 32, 4, " << Encoded_s_code_end << '\n';
|
||||
return true;
|
||||
}
|
||||
|
||||
void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
|
||||
const MCSubtargetInfo &STI, StringRef KernelName,
|
||||
const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR,
|
||||
|
@ -552,6 +559,18 @@ bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
|
|||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPUTargetELFStreamer::EmitCodeEnd() {
|
||||
const uint32_t Encoded_s_code_end = 0xbf9f0000;
|
||||
|
||||
MCStreamer &OS = getStreamer();
|
||||
OS.PushSection();
|
||||
OS.EmitValueToAlignment(64, Encoded_s_code_end, 4);
|
||||
for (unsigned I = 0; I < 32; ++I)
|
||||
OS.EmitIntValue(Encoded_s_code_end, 4);
|
||||
OS.PopSection();
|
||||
return true;
|
||||
}
|
||||
|
||||
void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
|
||||
const MCSubtargetInfo &STI, StringRef KernelName,
|
||||
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
|
||||
|
|
|
@ -74,6 +74,9 @@ public:
|
|||
/// \returns True on success, false on failure.
|
||||
virtual bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) = 0;
|
||||
|
||||
/// \returns True on success, false on failure.
|
||||
virtual bool EmitCodeEnd() = 0;
|
||||
|
||||
virtual void EmitAmdhsaKernelDescriptor(
|
||||
const MCSubtargetInfo &STI, StringRef KernelName,
|
||||
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
|
||||
|
@ -113,6 +116,9 @@ public:
|
|||
/// \returns True on success, false on failure.
|
||||
bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override;
|
||||
|
||||
/// \returns True on success, false on failure.
|
||||
bool EmitCodeEnd() override;
|
||||
|
||||
void EmitAmdhsaKernelDescriptor(
|
||||
const MCSubtargetInfo &STI, StringRef KernelName,
|
||||
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
|
||||
|
@ -155,6 +161,9 @@ public:
|
|||
/// \returns True on success, false on failure.
|
||||
bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override;
|
||||
|
||||
/// \returns True on success, false on failure.
|
||||
bool EmitCodeEnd() override;
|
||||
|
||||
void EmitAmdhsaKernelDescriptor(
|
||||
const MCSubtargetInfo &STI, StringRef KernelName,
|
||||
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
|
||||
|
|
|
@ -28,6 +28,8 @@ struct SIProgramInfo {
|
|||
uint32_t DX10Clamp = 0;
|
||||
uint32_t DebugMode = 0;
|
||||
uint32_t IEEEMode = 0;
|
||||
uint32_t WgpMode = 0; // GFX10+
|
||||
uint32_t MemOrdered = 0; // GFX10+
|
||||
uint64_t ScratchSize = 0;
|
||||
|
||||
uint64_t ComputePGMRSrc1 = 0;
|
||||
|
|
|
@ -0,0 +1,80 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=gfx1010 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,GCN-ASM,GFX10,GFX10-ASM %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx1010 -filetype=obj < %s | llvm-objdump -arch=amdgcn -mcpu=gfx1010 -disassemble - | FileCheck -check-prefixes=GCN,GCN-OBJ,GFX10,GFX10-OBJ %s
|
||||
|
||||
; GCN: a_kernel1:
|
||||
; GCN-NEXT: s_endpgm
|
||||
; GCN-ASM-NEXT: [[END_LABEL1:\.Lfunc_end.*]]:
|
||||
; GCN-ASM-NEXT: .size a_kernel1, [[END_LABEL1]]-a_kernel1
|
||||
; GCN-ASM: .section .AMDGPU.config
|
||||
|
||||
; GCN-OBJ-NEXT: s_nop 0
|
||||
|
||||
define amdgpu_kernel void @a_kernel1() {
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN: a_kernel2:
|
||||
; GCN-NEXT: s_endpgm
|
||||
; GCN-ASM-NEXT: [[END_LABEL2:\.Lfunc_end.*]]:
|
||||
; GCN-ASM-NEXT: .size a_kernel2, [[END_LABEL2]]-a_kernel2
|
||||
; GCN-ASM: .section .AMDGPU.config
|
||||
|
||||
; GCN-OBJ-NEXT: {{^$}}
|
||||
|
||||
define amdgpu_kernel void @a_kernel2() {
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-ASM: .text
|
||||
; GCN-ASM-NEXT: .globl a_function
|
||||
; GCN-ASM-NEXT: .p2align 2
|
||||
; GCN-ASM-NEXT: .type a_function,@function
|
||||
|
||||
; GCN-NEXT: a_function:
|
||||
; GCN: s_setpc_b64
|
||||
; GCN-ASM-NEXT: [[END_LABEL3:\.Lfunc_end.*]]:
|
||||
; GCN-ASM-NEXT: .size a_function, [[END_LABEL3]]-a_function
|
||||
; GFX10-ASM: .p2alignl 6, 3214868480
|
||||
; GFX10-ASM-NEXT: .fill 32, 4, 3214868480
|
||||
|
||||
; GFX10-OBJ-NEXT: s_code_end
|
||||
|
||||
; GFX10-OBJ: s_code_end // 000000000140:
|
||||
; GFX10-OBJ-NEXT: s_code_end
|
||||
; GFX10-OBJ-NEXT: s_code_end
|
||||
; GFX10-OBJ-NEXT: s_code_end
|
||||
; GFX10-OBJ-NEXT: s_code_end
|
||||
; GFX10-OBJ-NEXT: s_code_end
|
||||
; GFX10-OBJ-NEXT: s_code_end
|
||||
; GFX10-OBJ-NEXT: s_code_end
|
||||
|
||||
; GFX10-OBJ-NEXT: s_code_end
|
||||
; GFX10-OBJ-NEXT: s_code_end
|
||||
; GFX10-OBJ-NEXT: s_code_end
|
||||
; GFX10-OBJ-NEXT: s_code_end
|
||||
; GFX10-OBJ-NEXT: s_code_end
|
||||
; GFX10-OBJ-NEXT: s_code_end
|
||||
; GFX10-OBJ-NEXT: s_code_end
|
||||
; GFX10-OBJ-NEXT: s_code_end
|
||||
|
||||
; GFX10-OBJ-NEXT: s_code_end
|
||||
; GFX10-OBJ-NEXT: s_code_end
|
||||
; GFX10-OBJ-NEXT: s_code_end
|
||||
; GFX10-OBJ-NEXT: s_code_end
|
||||
; GFX10-OBJ-NEXT: s_code_end
|
||||
; GFX10-OBJ-NEXT: s_code_end
|
||||
; GFX10-OBJ-NEXT: s_code_end
|
||||
; GFX10-OBJ-NEXT: s_code_end
|
||||
|
||||
; GFX10-OBJ-NEXT: s_code_end
|
||||
; GFX10-OBJ-NEXT: s_code_end
|
||||
; GFX10-OBJ-NEXT: s_code_end
|
||||
; GFX10-OBJ-NEXT: s_code_end
|
||||
; GFX10-OBJ-NEXT: s_code_end
|
||||
; GFX10-OBJ-NEXT: s_code_end
|
||||
; GFX10-OBJ-NEXT: s_code_end
|
||||
; GFX10-OBJ-NEXT: s_code_end
|
||||
|
||||
define void @a_function() {
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue