llvm-project/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to...

; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -amdgpu-spill-sgpr-to-vgpr=true < %s | FileCheck -check-prefixes=GCN,SPILL-TO-VGPR %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -amdgpu-spill-sgpr-to-vgpr=false < %s | FileCheck -check-prefixes=GCN,NO-SPILL-TO-VGPR %s

; Check frame setup where SGPR spills to VGPRs are disabled or enabled.

declare hidden void @external_void_func_void() #0

; GCN-LABEL: {{^}}callee_with_stack_and_call:
; SPILL-TO-VGPR:      buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; SPILL-TO-VGPR:      v_writelane_b32 v40, s33, 2
; SPILL-TO-VGPR:      v_writelane_b32 v40, s30, 0
; SPILL-TO-VGPR:      v_writelane_b32 v40, s31, 1
; NO-SPILL-TO-VGPR:   v_mov_b32_e32 v0, s33
; NO-SPILL-TO-VGPR:   buffer_store_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
; NO-SPILL-TO-VGPR:   s_mov_b64 exec, s[4:5]
; NO-SPILL-TO-VGPR:   s_mov_b64 s[6:7], exec
; NO-SPILL-TO-VGPR:   s_mov_b64 exec, 3
; NO-SPILL-TO-VGPR:   buffer_store_dword v1, off, s[0:3], s33 offset:16
; NO-SPILL-TO-VGPR:   v_writelane_b32 v1, s30, 0
; NO-SPILL-TO-VGPR:   v_writelane_b32 v1, s31, 1
; NO-SPILL-TO-VGPR:   buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; NO-SPILL-TO-VGPR:   buffer_load_dword v1, off, s[0:3], s33 offset:16
; NO-SPILL-TO-VGPR:   s_waitcnt vmcnt(0)
; NO-SPILL-TO-VGPR:   s_mov_b64 exec, s[6:7]

; GCN:                s_swappc_b64 s[30:31], s[4:5]

; SPILL-TO-VGPR:      v_readlane_b32 s4, v40, 0
; SPILL-TO-VGPR:      v_readlane_b32 s5, v40, 1
; NO-SPILL-TO-VGPR:   v_readlane_b32 s4, v2, 0
; NO-SPILL-TO-VGPR:   v_readlane_b32 s5, v2, 1

; SPILL-TO-VGPR:      v_readlane_b32 s33, v40, 2
; NO-SPILL-TO-VGPR:   buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
; NO-SPILL-TO-VGPR:   v_readfirstlane_b32 s33, v0
define void @callee_with_stack_and_call() #0 {
  %alloca = alloca i32, addrspace(5)
  store volatile i32 0, i32 addrspace(5)* %alloca
  call void @external_void_func_void()
  ret void
}

attributes #0 = { nounwind }
[AMDGPU] Fix crash with sgpr spills to vgpr disabled This would assert with amdgpu-spill-sgpr-to-vgpr disabled when trying to spill the FP. Fixes: SWDEV-262704 Reviewed By: RamNalamothu Differential Revision: https://reviews.llvm.org/D95768 2021-02-02 00:34:57 +08:00			`; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -amdgpu-spill-sgpr-to-vgpr=true < %s \| FileCheck -check-prefixes=GCN,SPILL-TO-VGPR %s`
			`; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -amdgpu-spill-sgpr-to-vgpr=false < %s \| FileCheck -check-prefixes=GCN,NO-SPILL-TO-VGPR %s`

			`; Check frame setup where SGPR spills to VGPRs are disabled or enabled.`

			`declare hidden void @external_void_func_void() #0`

			`; GCN-LABEL: {{^}}callee_with_stack_and_call:`
			`; SPILL-TO-VGPR: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill`
			`; SPILL-TO-VGPR: v_writelane_b32 v40, s33, 2`
[AMDGPU] Do not attempt sgpr spills to vgpr, when it is disabled This covers a path missed in https://reviews.llvm.org/D95768. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D98013 2021-03-05 16:25:43 +08:00			`; SPILL-TO-VGPR: v_writelane_b32 v40, s30, 0`
			`; SPILL-TO-VGPR: v_writelane_b32 v40, s31, 1`
[AMDGPU] Fix crash with sgpr spills to vgpr disabled This would assert with amdgpu-spill-sgpr-to-vgpr disabled when trying to spill the FP. Fixes: SWDEV-262704 Reviewed By: RamNalamothu Differential Revision: https://reviews.llvm.org/D95768 2021-02-02 00:34:57 +08:00			`; NO-SPILL-TO-VGPR: v_mov_b32_e32 v0, s33`
			`; NO-SPILL-TO-VGPR: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill`
[AMDGPU] Save VGPR of whole wave when spilling Spilling SGPRs to scratch uses a temporary VGPR. LLVM currently cannot determine if a VGPR is used in other lanes or not, so we need to save all lanes of the VGPR. We even need to save the VGPR if it is marked as dead. The generated code depends on two things: - Can we scavenge an SGPR to save EXEC? - And can we scavenge a VGPR? If we can scavenge an SGPR, we - save EXEC into the SGPR - set the needed lane mask - save the temporary VGPR - write the spilled SGPR into VGPR lanes - save the VGPR again to the target stack slot - restore the VGPR - restore EXEC If we were not able to scavenge an SGPR, we do the same operations, but everytime the temporary VGPR is written to memory, we - write VGPR to memory - flip exec (s_not exec, exec) - write VGPR again (previously inactive lanes) Surprisingly often, we are able to scavenge an SGPR, even though we are at the brink of running out of SGPRs. Scavenging a VGPR does not have a great effect (saves three instructions if no SGPR was scavenged), but we need to know if the VGPR we use is live before or not, otherwise the machine verifier complains. Differential Revision: https://reviews.llvm.org/D96336 2021-04-12 16:25:54 +08:00			`; NO-SPILL-TO-VGPR: s_mov_b64 exec, s[4:5]`
			`; NO-SPILL-TO-VGPR: s_mov_b64 s[6:7], exec`
			`; NO-SPILL-TO-VGPR: s_mov_b64 exec, 3`
			`; NO-SPILL-TO-VGPR: buffer_store_dword v1, off, s[0:3], s33 offset:16`
[AMDGPU] Do not attempt sgpr spills to vgpr, when it is disabled This covers a path missed in https://reviews.llvm.org/D95768. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D98013 2021-03-05 16:25:43 +08:00			`; NO-SPILL-TO-VGPR: v_writelane_b32 v1, s30, 0`
			`; NO-SPILL-TO-VGPR: v_writelane_b32 v1, s31, 1`
			`; NO-SPILL-TO-VGPR: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill`
[AMDGPU] Save VGPR of whole wave when spilling Spilling SGPRs to scratch uses a temporary VGPR. LLVM currently cannot determine if a VGPR is used in other lanes or not, so we need to save all lanes of the VGPR. We even need to save the VGPR if it is marked as dead. The generated code depends on two things: - Can we scavenge an SGPR to save EXEC? - And can we scavenge a VGPR? If we can scavenge an SGPR, we - save EXEC into the SGPR - set the needed lane mask - save the temporary VGPR - write the spilled SGPR into VGPR lanes - save the VGPR again to the target stack slot - restore the VGPR - restore EXEC If we were not able to scavenge an SGPR, we do the same operations, but everytime the temporary VGPR is written to memory, we - write VGPR to memory - flip exec (s_not exec, exec) - write VGPR again (previously inactive lanes) Surprisingly often, we are able to scavenge an SGPR, even though we are at the brink of running out of SGPRs. Scavenging a VGPR does not have a great effect (saves three instructions if no SGPR was scavenged), but we need to know if the VGPR we use is live before or not, otherwise the machine verifier complains. Differential Revision: https://reviews.llvm.org/D96336 2021-04-12 16:25:54 +08:00			`; NO-SPILL-TO-VGPR: buffer_load_dword v1, off, s[0:3], s33 offset:16`
			`; NO-SPILL-TO-VGPR: s_waitcnt vmcnt(0)`
			`; NO-SPILL-TO-VGPR: s_mov_b64 exec, s[6:7]`
[AMDGPU] Fix crash with sgpr spills to vgpr disabled This would assert with amdgpu-spill-sgpr-to-vgpr disabled when trying to spill the FP. Fixes: SWDEV-262704 Reviewed By: RamNalamothu Differential Revision: https://reviews.llvm.org/D95768 2021-02-02 00:34:57 +08:00
			`; GCN: s_swappc_b64 s[30:31], s[4:5]`

			`; SPILL-TO-VGPR: v_readlane_b32 s4, v40, 0`
			`; SPILL-TO-VGPR: v_readlane_b32 s5, v40, 1`
[AMDGPU] Save VGPR of whole wave when spilling Spilling SGPRs to scratch uses a temporary VGPR. LLVM currently cannot determine if a VGPR is used in other lanes or not, so we need to save all lanes of the VGPR. We even need to save the VGPR if it is marked as dead. The generated code depends on two things: - Can we scavenge an SGPR to save EXEC? - And can we scavenge a VGPR? If we can scavenge an SGPR, we - save EXEC into the SGPR - set the needed lane mask - save the temporary VGPR - write the spilled SGPR into VGPR lanes - save the VGPR again to the target stack slot - restore the VGPR - restore EXEC If we were not able to scavenge an SGPR, we do the same operations, but everytime the temporary VGPR is written to memory, we - write VGPR to memory - flip exec (s_not exec, exec) - write VGPR again (previously inactive lanes) Surprisingly often, we are able to scavenge an SGPR, even though we are at the brink of running out of SGPRs. Scavenging a VGPR does not have a great effect (saves three instructions if no SGPR was scavenged), but we need to know if the VGPR we use is live before or not, otherwise the machine verifier complains. Differential Revision: https://reviews.llvm.org/D96336 2021-04-12 16:25:54 +08:00			`; NO-SPILL-TO-VGPR: v_readlane_b32 s4, v2, 0`
			`; NO-SPILL-TO-VGPR: v_readlane_b32 s5, v2, 1`
[AMDGPU] Fix crash with sgpr spills to vgpr disabled This would assert with amdgpu-spill-sgpr-to-vgpr disabled when trying to spill the FP. Fixes: SWDEV-262704 Reviewed By: RamNalamothu Differential Revision: https://reviews.llvm.org/D95768 2021-02-02 00:34:57 +08:00
			`; SPILL-TO-VGPR: v_readlane_b32 s33, v40, 2`
			`; NO-SPILL-TO-VGPR: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload`
			`; NO-SPILL-TO-VGPR: v_readfirstlane_b32 s33, v0`
			`define void @callee_with_stack_and_call() #0 {`
			`%alloca = alloca i32, addrspace(5)`
			`store volatile i32 0, i32 addrspace(5)* %alloca`
			`call void @external_void_func_void()`
			`ret void`
			`}`

			`attributes #0 = { nounwind }`