forked from OSchip/llvm-project
[AMDGPU] Increase kernel padding
To support prefetch mode 3 we need to pad current cacheline and fill 3 cachelines after. Current padding is only sufficient for mode 2. Differential Revision: https://reviews.llvm.org/D65236 llvm-svn: 366938
This commit is contained in:
parent
65217a4fa9
commit
c43784ff26
|
@ -250,7 +250,7 @@ bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
|
|||
bool AMDGPUTargetAsmStreamer::EmitCodeEnd() {
|
||||
const uint32_t Encoded_s_code_end = 0xbf9f0000;
|
||||
OS << "\t.p2alignl 6, " << Encoded_s_code_end << '\n';
|
||||
OS << "\t.fill 32, 4, " << Encoded_s_code_end << '\n';
|
||||
OS << "\t.fill 48, 4, " << Encoded_s_code_end << '\n';
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -602,7 +602,7 @@ bool AMDGPUTargetELFStreamer::EmitCodeEnd() {
|
|||
MCStreamer &OS = getStreamer();
|
||||
OS.PushSection();
|
||||
OS.EmitValueToAlignment(64, Encoded_s_code_end, 4);
|
||||
for (unsigned I = 0; I < 32; ++I)
|
||||
for (unsigned I = 0; I < 48; ++I)
|
||||
OS.EmitIntValue(Encoded_s_code_end, 4);
|
||||
OS.PopSection();
|
||||
return true;
|
||||
|
|
|
@ -35,47 +35,14 @@ define amdgpu_kernel void @a_kernel2() {
|
|||
; GCN-ASM-NEXT: [[END_LABEL3:\.Lfunc_end.*]]:
|
||||
; GCN-ASM-NEXT: .size a_function, [[END_LABEL3]]-a_function
|
||||
; GFX10END-ASM: .p2alignl 6, 3214868480
|
||||
; GFX10END-ASM-NEXT: .fill 32, 4, 3214868480
|
||||
; GFX10END-ASM-NEXT: .fill 48, 4, 3214868480
|
||||
; GFX10NOEND-NOT: .fill
|
||||
|
||||
; GFX10NOEND-OBJ-NOT: s_code_end
|
||||
; GFX10END-OBJ-NEXT: s_code_end
|
||||
|
||||
; GFX10END-OBJ: s_code_end // 000000000140:
|
||||
; GFX10END-OBJ-NEXT: s_code_end
|
||||
; GFX10END-OBJ-NEXT: s_code_end
|
||||
; GFX10END-OBJ-NEXT: s_code_end
|
||||
; GFX10END-OBJ-NEXT: s_code_end
|
||||
; GFX10END-OBJ-NEXT: s_code_end
|
||||
; GFX10END-OBJ-NEXT: s_code_end
|
||||
; GFX10END-OBJ-NEXT: s_code_end
|
||||
|
||||
; GFX10END-OBJ-NEXT: s_code_end
|
||||
; GFX10END-OBJ-NEXT: s_code_end
|
||||
; GFX10END-OBJ-NEXT: s_code_end
|
||||
; GFX10END-OBJ-NEXT: s_code_end
|
||||
; GFX10END-OBJ-NEXT: s_code_end
|
||||
; GFX10END-OBJ-NEXT: s_code_end
|
||||
; GFX10END-OBJ-NEXT: s_code_end
|
||||
; GFX10END-OBJ-NEXT: s_code_end
|
||||
|
||||
; GFX10END-OBJ-NEXT: s_code_end
|
||||
; GFX10END-OBJ-NEXT: s_code_end
|
||||
; GFX10END-OBJ-NEXT: s_code_end
|
||||
; GFX10END-OBJ-NEXT: s_code_end
|
||||
; GFX10END-OBJ-NEXT: s_code_end
|
||||
; GFX10END-OBJ-NEXT: s_code_end
|
||||
; GFX10END-OBJ-NEXT: s_code_end
|
||||
; GFX10END-OBJ-NEXT: s_code_end
|
||||
|
||||
; GFX10END-OBJ-NEXT: s_code_end
|
||||
; GFX10END-OBJ-NEXT: s_code_end
|
||||
; GFX10END-OBJ-NEXT: s_code_end
|
||||
; GFX10END-OBJ-NEXT: s_code_end
|
||||
; GFX10END-OBJ-NEXT: s_code_end
|
||||
; GFX10END-OBJ-NEXT: s_code_end
|
||||
; GFX10END-OBJ-NEXT: s_code_end
|
||||
; GFX10END-OBJ-NEXT: s_code_end
|
||||
; GFX10END-OBJ-COUNT-47: s_code_end
|
||||
|
||||
define void @a_function() {
|
||||
ret void
|
||||
|
|
Loading…
Reference in New Issue