llvm-project/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll

; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=TOSGPR -check-prefix=ALL %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -amdgpu-spill-sgpr-to-smem=1 -verify-machineinstrs < %s | FileCheck -check-prefix=TOSMEM -check-prefix=ALL %s

; If spilling to smem, additional registers are used for the resource
; descriptor.

; ALL-LABEL: {{^}}max_9_sgprs:

; ALL: SGPRBlocks: 1
; ALL: NumSGPRsForWavesPerEU: 9
define amdgpu_kernel void @max_9_sgprs(i32 addrspace(1)* %out1,

                          i32 addrspace(1)* %out2,
                          i32 addrspace(1)* %out3,
                          i32 addrspace(1)* %out4,
                          i32 addrspace(1)* %out5,
                          i32 %one, i32 %two, i32 %three, i32 %four, i32 %five) #0 {
  store i32 %one, i32 addrspace(1)* %out1
  store i32 %two, i32 addrspace(1)* %out2
  store i32 %three, i32 addrspace(1)* %out3
  store i32 %four, i32 addrspace(1)* %out4
  store i32 %five, i32 addrspace(1)* %out5
  ret void
}

; private resource: 4
; scratch wave offset: 1
; workgroup ids: 3
; dispatch id: 2
; queue ptr: 2
; flat scratch init: 2
; ---------------------
; total: 14

; + reserved vcc = 16

; Because we can't handle re-using the last few input registers as the
; special vcc etc. registers (as well as decide to not use the unused
; features when the number of registers is frozen), this ends up using
; more than expected.

; XALL-LABEL: {{^}}max_12_sgprs_14_input_sgprs:
; XTOSGPR: SGPRBlocks: 1
; XTOSGPR: NumSGPRsForWavesPerEU: 16

; XTOSMEM: s_mov_b64 s[10:11], s[2:3]
; XTOSMEM: s_mov_b64 s[8:9], s[0:1]
; XTOSMEM: s_mov_b32 s7, s13

; XTOSMEM: SGPRBlocks: 1
; XTOSMEM: NumSGPRsForWavesPerEU: 16
;
; This test case is disabled: When calculating the spillslot addresses AMDGPU
; creates an extra vreg to save/restore m0 which in a point of maximum register
; pressure would trigger an endless loop; the compiler aborts earlier with
; "Incomplete scavenging after 2nd pass" in practice.
;define amdgpu_kernel void @max_12_sgprs_14_input_sgprs(i32 addrspace(1)* %out1,
;                                        i32 addrspace(1)* %out2,
;                                        i32 addrspace(1)* %out3,
;                                        i32 addrspace(1)* %out4,
;                                        i32 %one, i32 %two, i32 %three, i32 %four) #2 {
;  %x.0 = call i32 @llvm.amdgcn.workgroup.id.x()
;  %x.1 = call i32 @llvm.amdgcn.workgroup.id.y()
;  %x.2 = call i32 @llvm.amdgcn.workgroup.id.z()
;  %x.3 = call i64 @llvm.amdgcn.dispatch.id()
;  %x.4 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
;  %x.5 = call i8 addrspace(2)* @llvm.amdgcn.queue.ptr()
;  store volatile i32 0, i32* undef
;  br label %stores
;
;stores:
;  store volatile i32 %x.0, i32 addrspace(1)* undef
;  store volatile i32 %x.0, i32 addrspace(1)* undef
;  store volatile i32 %x.0, i32 addrspace(1)* undef
;  store volatile i64 %x.3, i64 addrspace(1)* undef
;  store volatile i8 addrspace(2)* %x.4, i8 addrspace(2)* addrspace(1)* undef
;  store volatile i8 addrspace(2)* %x.5, i8 addrspace(2)* addrspace(1)* undef
;
;  store i32 %one, i32 addrspace(1)* %out1
;  store i32 %two, i32 addrspace(1)* %out2
;  store i32 %three, i32 addrspace(1)* %out3
;  store i32 %four, i32 addrspace(1)* %out4
;  ret void
;}

; The following test is commented out for now; http://llvm.org/PR31230
; XALL-LABEL: max_12_sgprs_12_input_sgprs{{$}}
; ; Make sure copies for input buffer are not clobbered. This requires
; ; swapping the order the registers are copied from what normally
; ; happens.

; XTOSMEM: s_mov_b32 s5, s11
; XTOSMEM: s_add_u32 m0, s5,
; XTOSMEM: s_buffer_store_dword vcc_lo, s[0:3], m0

; XALL: SGPRBlocks: 2
; XALL: NumSGPRsForWavesPerEU: 18
;define amdgpu_kernel void @max_12_sgprs_12_input_sgprs(i32 addrspace(1)* %out1,
;                                        i32 addrspace(1)* %out2,
;                                        i32 addrspace(1)* %out3,
;                                        i32 addrspace(1)* %out4,
;                                        i32 %one, i32 %two, i32 %three, i32 %four) #2 {
;  store volatile i32 0, i32* undef
;  %x.0 = call i32 @llvm.amdgcn.workgroup.id.x()
;  store volatile i32 %x.0, i32 addrspace(1)* undef
;  %x.1 = call i32 @llvm.amdgcn.workgroup.id.y()
;  store volatile i32 %x.0, i32 addrspace(1)* undef
;  %x.2 = call i32 @llvm.amdgcn.workgroup.id.z()
;  store volatile i32 %x.0, i32 addrspace(1)* undef
;  %x.3 = call i64 @llvm.amdgcn.dispatch.id()
;  store volatile i64 %x.3, i64 addrspace(1)* undef
;  %x.4 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
;  store volatile i8 addrspace(2)* %x.4, i8 addrspace(2)* addrspace(1)* undef
;
;  store i32 %one, i32 addrspace(1)* %out1
;  store i32 %two, i32 addrspace(1)* %out2
;  store i32 %three, i32 addrspace(1)* %out3
;  store i32 %four, i32 addrspace(1)* %out4
;  ret void
;}

declare i32 @llvm.amdgcn.workgroup.id.x() #1
declare i32 @llvm.amdgcn.workgroup.id.y() #1
declare i32 @llvm.amdgcn.workgroup.id.z() #1
declare i64 @llvm.amdgcn.dispatch.id() #1
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #1
declare i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #1

attributes #0 = { nounwind "amdgpu-num-sgpr"="14" }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind "amdgpu-num-sgpr"="12" }
attributes #3 = { nounwind "amdgpu-num-sgpr"="11" }
AMDGPU/SI: Add back reverted SGPR spilling code, but disable it suggested as a better solution by Matt llvm-svn: 287942 2016-11-26 01:37:09 +08:00			`; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s \| FileCheck -check-prefix=TOSGPR -check-prefix=ALL %s`
			`; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -amdgpu-spill-sgpr-to-smem=1 -verify-machineinstrs < %s \| FileCheck -check-prefix=TOSMEM -check-prefix=ALL %s`
[AMDGPU] Wave and register controls - Implemented amdgpu-flat-work-group-size attribute - Implemented amdgpu-num-active-waves-per-eu attribute - Implemented amdgpu-num-sgpr attribute - Implemented amdgpu-num-vgpr attribute - Dynamic LDS constraints are in a separate patch Patch by Tom Stellard and Konstantin Zhuravlyov Differential Revision: https://reviews.llvm.org/D21562 llvm-svn: 280747 2016-09-07 04:22:28 +08:00
AMDGPU/SI: Add back reverted SGPR spilling code, but disable it suggested as a better solution by Matt llvm-svn: 287942 2016-11-26 01:37:09 +08:00			`; If spilling to smem, additional registers are used for the resource`
			`; descriptor.`

[AMDGPU] Fix typo in GCNSchedStrategy Differential revision: https://reviews.llvm.org/D28980 llvm-svn: 293171 2017-01-26 18:51:47 +08:00			`; ALL-LABEL: {{^}}max_9_sgprs:`
AMDGPU: Fix using incorrect private resource with no allocation It's possible to have a use of the private resource descriptor or scratch wave offset registers even though there are no allocated stack objects. This would result in continuing to use the maximum number reserved registers. This could go over the number of SGPRs available on VI, or violate the SGPR limit requested by the function attributes. llvm-svn: 285435 2016-10-29 03:43:31 +08:00
AMDGPU/SI: Add back reverted SGPR spilling code, but disable it suggested as a better solution by Matt llvm-svn: 287942 2016-11-26 01:37:09 +08:00			`; ALL: SGPRBlocks: 1`
[AMDGPU] Fix typo in GCNSchedStrategy Differential revision: https://reviews.llvm.org/D28980 llvm-svn: 293171 2017-01-26 18:51:47 +08:00			`; ALL: NumSGPRsForWavesPerEU: 9`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @max_9_sgprs(i32 addrspace(1)* %out1,`
AMDGPU/SI: Add back reverted SGPR spilling code, but disable it suggested as a better solution by Matt llvm-svn: 287942 2016-11-26 01:37:09 +08:00
[AMDGPU] Wave and register controls - Implemented amdgpu-flat-work-group-size attribute - Implemented amdgpu-num-active-waves-per-eu attribute - Implemented amdgpu-num-sgpr attribute - Implemented amdgpu-num-vgpr attribute - Dynamic LDS constraints are in a separate patch Patch by Tom Stellard and Konstantin Zhuravlyov Differential Revision: https://reviews.llvm.org/D21562 llvm-svn: 280747 2016-09-07 04:22:28 +08:00			`i32 addrspace(1)* %out2,`
			`i32 addrspace(1)* %out3,`
			`i32 addrspace(1)* %out4,`
[AMDGPU] Revert failed scheduling This patch reverts region's scheduling to the original untouched state in case if we have have decreased occupancy. In addition it switches to use TargetRegisterInfo occupancy callback for pressure limits instead of gradually increasing limits which were just passed by. We are going to stay with the best schedule so we do not need to tolerate worsened scheduling anymore. Differential Revision: https://reviews.llvm.org/D29971 llvm-svn: 295206 2017-02-16 01:19:50 +08:00			`i32 addrspace(1)* %out5,`
			`i32 %one, i32 %two, i32 %three, i32 %four, i32 %five) #0 {`
[AMDGPU] Wave and register controls - Implemented amdgpu-flat-work-group-size attribute - Implemented amdgpu-num-active-waves-per-eu attribute - Implemented amdgpu-num-sgpr attribute - Implemented amdgpu-num-vgpr attribute - Dynamic LDS constraints are in a separate patch Patch by Tom Stellard and Konstantin Zhuravlyov Differential Revision: https://reviews.llvm.org/D21562 llvm-svn: 280747 2016-09-07 04:22:28 +08:00			`store i32 %one, i32 addrspace(1)* %out1`
			`store i32 %two, i32 addrspace(1)* %out2`
			`store i32 %three, i32 addrspace(1)* %out3`
			`store i32 %four, i32 addrspace(1)* %out4`
[AMDGPU] Revert failed scheduling This patch reverts region's scheduling to the original untouched state in case if we have have decreased occupancy. In addition it switches to use TargetRegisterInfo occupancy callback for pressure limits instead of gradually increasing limits which were just passed by. We are going to stay with the best schedule so we do not need to tolerate worsened scheduling anymore. Differential Revision: https://reviews.llvm.org/D29971 llvm-svn: 295206 2017-02-16 01:19:50 +08:00			`store i32 %five, i32 addrspace(1)* %out5`
[AMDGPU] Wave and register controls - Implemented amdgpu-flat-work-group-size attribute - Implemented amdgpu-num-active-waves-per-eu attribute - Implemented amdgpu-num-sgpr attribute - Implemented amdgpu-num-vgpr attribute - Dynamic LDS constraints are in a separate patch Patch by Tom Stellard and Konstantin Zhuravlyov Differential Revision: https://reviews.llvm.org/D21562 llvm-svn: 280747 2016-09-07 04:22:28 +08:00			`ret void`
			`}`
AMDGPU: Fix using incorrect private resource with no allocation It's possible to have a use of the private resource descriptor or scratch wave offset registers even though there are no allocated stack objects. This would result in continuing to use the maximum number reserved registers. This could go over the number of SGPRs available on VI, or violate the SGPR limit requested by the function attributes. llvm-svn: 285435 2016-10-29 03:43:31 +08:00
			`; private resource: 4`
			`; scratch wave offset: 1`
			`; workgroup ids: 3`
			`; dispatch id: 2`
			`; queue ptr: 2`
			`; flat scratch init: 2`
			`; ---------------------`
			`; total: 14`

AMDGPU/SI: Don't reserve FLAT_SCR on non-HSA targets & without stack objects Summary: This frees 2 scalar registers. Reviewers: tstellarAMD Subscribers: qcolombet, arsenm, kzhuravl, wdng, nhaehnle, yaxunl, tony-tye Differential Revision: https://reviews.llvm.org/D27150 llvm-svn: 289261 2016-12-10 03:49:48 +08:00			`; + reserved vcc = 16`
AMDGPU: Fix using incorrect private resource with no allocation It's possible to have a use of the private resource descriptor or scratch wave offset registers even though there are no allocated stack objects. This would result in continuing to use the maximum number reserved registers. This could go over the number of SGPRs available on VI, or violate the SGPR limit requested by the function attributes. llvm-svn: 285435 2016-10-29 03:43:31 +08:00
			`; Because we can't handle re-using the last few input registers as the`
			`; special vcc etc. registers (as well as decide to not use the unused`
			`; features when the number of registers is frozen), this ends up using`
			`; more than expected.`

RegScavenging: Add scavengeRegisterBackwards() Re-apply r276044/r279124/r305516. Fixed a problem where we would refuse to place spills as the very first instruciton of a basic block and thus artifically increase pressure (test in test/CodeGen/PowerPC/scavenging.mir:spill_at_begin) This is a variant of scavengeRegister() that works for enterBasicBlockEnd()/backward(). The benefit of the backward mode is that it is not affected by incomplete kill flags. This patch also changes PrologEpilogInserter::doScavengeFrameVirtualRegs() to use the register scavenger in backwards mode. Differential Revision: http://reviews.llvm.org/D21885 llvm-svn: 305625 2017-06-17 10:08:18 +08:00			`; XALL-LABEL: {{^}}max_12_sgprs_14_input_sgprs:`
			`; XTOSGPR: SGPRBlocks: 1`
			`; XTOSGPR: NumSGPRsForWavesPerEU: 16`
AMDGPU: Fix using incorrect private resource with no allocation It's possible to have a use of the private resource descriptor or scratch wave offset registers even though there are no allocated stack objects. This would result in continuing to use the maximum number reserved registers. This could go over the number of SGPRs available on VI, or violate the SGPR limit requested by the function attributes. llvm-svn: 285435 2016-10-29 03:43:31 +08:00
RegScavenging: Add scavengeRegisterBackwards() Re-apply r276044/r279124/r305516. Fixed a problem where we would refuse to place spills as the very first instruciton of a basic block and thus artifically increase pressure (test in test/CodeGen/PowerPC/scavenging.mir:spill_at_begin) This is a variant of scavengeRegister() that works for enterBasicBlockEnd()/backward(). The benefit of the backward mode is that it is not affected by incomplete kill flags. This patch also changes PrologEpilogInserter::doScavengeFrameVirtualRegs() to use the register scavenger in backwards mode. Differential Revision: http://reviews.llvm.org/D21885 llvm-svn: 305625 2017-06-17 10:08:18 +08:00			`; XTOSMEM: s_mov_b64 s[10:11], s[2:3]`
			`; XTOSMEM: s_mov_b64 s[8:9], s[0:1]`
			`; XTOSMEM: s_mov_b32 s7, s13`
AMDGPU: Fix using incorrect private resource with no allocation It's possible to have a use of the private resource descriptor or scratch wave offset registers even though there are no allocated stack objects. This would result in continuing to use the maximum number reserved registers. This could go over the number of SGPRs available on VI, or violate the SGPR limit requested by the function attributes. llvm-svn: 285435 2016-10-29 03:43:31 +08:00
RegScavenging: Add scavengeRegisterBackwards() Re-apply r276044/r279124/r305516. Fixed a problem where we would refuse to place spills as the very first instruciton of a basic block and thus artifically increase pressure (test in test/CodeGen/PowerPC/scavenging.mir:spill_at_begin) This is a variant of scavengeRegister() that works for enterBasicBlockEnd()/backward(). The benefit of the backward mode is that it is not affected by incomplete kill flags. This patch also changes PrologEpilogInserter::doScavengeFrameVirtualRegs() to use the register scavenger in backwards mode. Differential Revision: http://reviews.llvm.org/D21885 llvm-svn: 305625 2017-06-17 10:08:18 +08:00			`; XTOSMEM: SGPRBlocks: 1`
			`; XTOSMEM: NumSGPRsForWavesPerEU: 16`
			`;`
			`; This test case is disabled: When calculating the spillslot addresses AMDGPU`
			`; creates an extra vreg to save/restore m0 which in a point of maximum register`
			`; pressure would trigger an endless loop; the compiler aborts earlier with`
			`; "Incomplete scavenging after 2nd pass" in practice.`
			`;define amdgpu_kernel void @max_12_sgprs_14_input_sgprs(i32 addrspace(1)* %out1,`
			`; i32 addrspace(1)* %out2,`
			`; i32 addrspace(1)* %out3,`
			`; i32 addrspace(1)* %out4,`
			`; i32 %one, i32 %two, i32 %three, i32 %four) #2 {`
			`; %x.0 = call i32 @llvm.amdgcn.workgroup.id.x()`
			`; %x.1 = call i32 @llvm.amdgcn.workgroup.id.y()`
			`; %x.2 = call i32 @llvm.amdgcn.workgroup.id.z()`
			`; %x.3 = call i64 @llvm.amdgcn.dispatch.id()`
			`; %x.4 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()`
			`; %x.5 = call i8 addrspace(2)* @llvm.amdgcn.queue.ptr()`
			`; store volatile i32 0, i32* undef`
			`; br label %stores`
			`;`
			`;stores:`
			`; store volatile i32 %x.0, i32 addrspace(1)* undef`
			`; store volatile i32 %x.0, i32 addrspace(1)* undef`
			`; store volatile i32 %x.0, i32 addrspace(1)* undef`
			`; store volatile i64 %x.3, i64 addrspace(1)* undef`
			`; store volatile i8 addrspace(2)* %x.4, i8 addrspace(2)* addrspace(1)* undef`
			`; store volatile i8 addrspace(2)* %x.5, i8 addrspace(2)* addrspace(1)* undef`
			`;`
			`; store i32 %one, i32 addrspace(1)* %out1`
			`; store i32 %two, i32 addrspace(1)* %out2`
			`; store i32 %three, i32 addrspace(1)* %out3`
			`; store i32 %four, i32 addrspace(1)* %out4`
			`; ret void`
			`;}`
AMDGPU: Fix using incorrect private resource with no allocation It's possible to have a use of the private resource descriptor or scratch wave offset registers even though there are no allocated stack objects. This would result in continuing to use the maximum number reserved registers. This could go over the number of SGPRs available on VI, or violate the SGPR limit requested by the function attributes. llvm-svn: 285435 2016-10-29 03:43:31 +08:00
RegisterCoalscer: Only coalesce complete reserved registers. The coalescer eliminates copies from reserved registers of the form: %vregX = COPY %rY in the case where %rY is a reserved register. However this turns out to be invalid if only some of the subregisters are reserved (see also https://reviews.llvm.org/D26648). Differential Revision: https://reviews.llvm.org/D26687 llvm-svn: 288428 2016-12-02 06:39:51 +08:00			`; The following test is commented out for now; http://llvm.org/PR31230`
			`; XALL-LABEL: max_12_sgprs_12_input_sgprs{{$}}`
AMDGPU: Fix using incorrect private resource with no allocation It's possible to have a use of the private resource descriptor or scratch wave offset registers even though there are no allocated stack objects. This would result in continuing to use the maximum number reserved registers. This could go over the number of SGPRs available on VI, or violate the SGPR limit requested by the function attributes. llvm-svn: 285435 2016-10-29 03:43:31 +08:00			`; ; Make sure copies for input buffer are not clobbered. This requires`
			`; ; swapping the order the registers are copied from what normally`
			`; ; happens.`

RegisterCoalscer: Only coalesce complete reserved registers. The coalescer eliminates copies from reserved registers of the form: %vregX = COPY %rY in the case where %rY is a reserved register. However this turns out to be invalid if only some of the subregisters are reserved (see also https://reviews.llvm.org/D26648). Differential Revision: https://reviews.llvm.org/D26687 llvm-svn: 288428 2016-12-02 06:39:51 +08:00			`; XTOSMEM: s_mov_b32 s5, s11`
			`; XTOSMEM: s_add_u32 m0, s5,`
			`; XTOSMEM: s_buffer_store_dword vcc_lo, s[0:3], m0`

			`; XALL: SGPRBlocks: 2`
			`; XALL: NumSGPRsForWavesPerEU: 18`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`;define amdgpu_kernel void @max_12_sgprs_12_input_sgprs(i32 addrspace(1)* %out1,`
RegisterCoalscer: Only coalesce complete reserved registers. The coalescer eliminates copies from reserved registers of the form: %vregX = COPY %rY in the case where %rY is a reserved register. However this turns out to be invalid if only some of the subregisters are reserved (see also https://reviews.llvm.org/D26648). Differential Revision: https://reviews.llvm.org/D26687 llvm-svn: 288428 2016-12-02 06:39:51 +08:00			`; i32 addrspace(1)* %out2,`
			`; i32 addrspace(1)* %out3,`
			`; i32 addrspace(1)* %out4,`
			`; i32 %one, i32 %two, i32 %three, i32 %four) #2 {`
			`; store volatile i32 0, i32* undef`
			`; %x.0 = call i32 @llvm.amdgcn.workgroup.id.x()`
			`; store volatile i32 %x.0, i32 addrspace(1)* undef`
			`; %x.1 = call i32 @llvm.amdgcn.workgroup.id.y()`
			`; store volatile i32 %x.0, i32 addrspace(1)* undef`
			`; %x.2 = call i32 @llvm.amdgcn.workgroup.id.z()`
			`; store volatile i32 %x.0, i32 addrspace(1)* undef`
			`; %x.3 = call i64 @llvm.amdgcn.dispatch.id()`
			`; store volatile i64 %x.3, i64 addrspace(1)* undef`
			`; %x.4 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()`
			`; store volatile i8 addrspace(2)* %x.4, i8 addrspace(2)* addrspace(1)* undef`
			`;`
			`; store i32 %one, i32 addrspace(1)* %out1`
			`; store i32 %two, i32 addrspace(1)* %out2`
			`; store i32 %three, i32 addrspace(1)* %out3`
			`; store i32 %four, i32 addrspace(1)* %out4`
			`; ret void`
			`;}`
AMDGPU: Fix using incorrect private resource with no allocation It's possible to have a use of the private resource descriptor or scratch wave offset registers even though there are no allocated stack objects. This would result in continuing to use the maximum number reserved registers. This could go over the number of SGPRs available on VI, or violate the SGPR limit requested by the function attributes. llvm-svn: 285435 2016-10-29 03:43:31 +08:00
			`declare i32 @llvm.amdgcn.workgroup.id.x() #1`
			`declare i32 @llvm.amdgcn.workgroup.id.y() #1`
			`declare i32 @llvm.amdgcn.workgroup.id.z() #1`
			`declare i64 @llvm.amdgcn.dispatch.id() #1`
			`declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #1`
			`declare i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #1`

			`attributes #0 = { nounwind "amdgpu-num-sgpr"="14" }`
			`attributes #1 = { nounwind readnone }`
			`attributes #2 = { nounwind "amdgpu-num-sgpr"="12" }`
			`attributes #3 = { nounwind "amdgpu-num-sgpr"="11" }`