llvm-project/llvm/test/CodeGen/AMDGPU/spill-cfg-position.ll

; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -stress-regalloc=6 < %s | FileCheck %s

; Inline spiller can decide to move a spill as early as possible in the basic block.
; It will skip phis and label, but we also need to make sure it skips instructions
; in the basic block prologue which restore exec mask.
; Make sure instruction to restore exec mask immediately follows label

; CHECK-LABEL: {{^}}spill_cfg_position:
; CHECK: s_cbranch_execz [[LABEL1:BB[0-9_]+]]
; CHECK: {{^}}[[LABEL1]]:
; CHECK: s_cbranch_execz [[LABEL2:BB[0-9_]+]]
; CHECK: {{^}}[[LABEL2]]:
; CHECK-NEXT: s_or_b64 exec
; CHECK: buffer_

define amdgpu_kernel void @spill_cfg_position(i32 addrspace(1)* nocapture %arg) {
bb:
  %tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #0
  %tmp14 = load i32, i32 addrspace(1)* %arg, align 4
  %tmp15 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1
  %tmp16 = load i32, i32 addrspace(1)* %tmp15, align 4
  %tmp17 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2
  %tmp18 = load i32, i32 addrspace(1)* %tmp17, align 4
  %tmp19 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 3
  %tmp20 = load i32, i32 addrspace(1)* %tmp19, align 4
  %tmp21 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 4
  %tmp22 = load i32, i32 addrspace(1)* %tmp21, align 4
  %tmp23 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 5
  %tmp24 = load i32, i32 addrspace(1)* %tmp23, align 4
  %tmp25 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 6
  %tmp26 = load i32, i32 addrspace(1)* %tmp25, align 4
  %tmp27 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 7
  %tmp28 = load i32, i32 addrspace(1)* %tmp27, align 4
  %tmp29 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 8
  %tmp30 = load i32, i32 addrspace(1)* %tmp29, align 4
  %tmp33 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp1
  %tmp34 = load i32, i32 addrspace(1)* %tmp33, align 4
  %tmp35 = icmp eq i32 %tmp34, 0
  br i1 %tmp35, label %bb44, label %bb36

bb36:                                             ; preds = %bb
  %tmp37 = mul nsw i32 %tmp20, %tmp18
  %tmp38 = add nsw i32 %tmp37, %tmp16
  %tmp39 = mul nsw i32 %tmp24, %tmp22
  %tmp40 = add nsw i32 %tmp38, %tmp39
  %tmp41 = mul nsw i32 %tmp28, %tmp26
  %tmp42 = add nsw i32 %tmp40, %tmp41
  %tmp43 = add nsw i32 %tmp42, %tmp30
  br label %bb52

bb44:                                             ; preds = %bb
  %tmp45 = mul nsw i32 %tmp18, %tmp16
  %tmp46 = mul nsw i32 %tmp22, %tmp20
  %tmp47 = add nsw i32 %tmp46, %tmp45
  %tmp48 = mul nsw i32 %tmp26, %tmp24
  %tmp49 = add nsw i32 %tmp47, %tmp48
  %tmp50 = mul nsw i32 %tmp30, %tmp28
  %tmp51 = add nsw i32 %tmp49, %tmp50
  br label %bb52

bb52:                                             ; preds = %bb44, %bb36
  %tmp53 = phi i32 [ %tmp43, %bb36 ], [ %tmp51, %bb44 ]
  %tmp54 = mul nsw i32 %tmp16, %tmp14
  %tmp55 = mul nsw i32 %tmp22, %tmp18
  %tmp56 = mul nsw i32 %tmp24, %tmp20
  %tmp57 = mul nsw i32 %tmp30, %tmp26
  %tmp58 = add i32 %tmp55, %tmp54
  %tmp59 = add i32 %tmp58, %tmp56
  %tmp60 = add i32 %tmp59, %tmp28
  %tmp61 = add i32 %tmp60, %tmp57
  %tmp62 = add i32 %tmp61, %tmp53
  store i32 %tmp62, i32 addrspace(1)* %tmp33, align 4
  ret void
}

declare i32 @llvm.amdgcn.workitem.id.x() #0

attributes #0 = { nounwind readnone }
AMDGPU: Assume xnack is enabled by default This is the conservatively correct default. It is always safe to assume xnack is enabled, but not the converse. Introduce a feature to blacklist targets where xnack can never be meaningfully enabled. I'm not sure the targets this is applied to is 100% correct. llvm-svn: 360903 2019-05-16 22:48:34 +08:00			`; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -stress-regalloc=6 < %s \| FileCheck %s`
[AMDGPU] Prevent spills before exec mask is restored Inline spiller can decide to move a spill as early as possible in the basic block. It will skip phis and label, but we also need to make sure it skips instructions in the basic block prologue which restore exec mask. Added isPositionLike callback in TargetInstrInfo to detect instructions which shall be skipped in addition to common phis, labels etc. Differential Revision: https://reviews.llvm.org/D27997 llvm-svn: 292554 2017-01-20 08:44:31 +08:00
			`; Inline spiller can decide to move a spill as early as possible in the basic block.`
			`; It will skip phis and label, but we also need to make sure it skips instructions`
			`; in the basic block prologue which restore exec mask.`
			`; Make sure instruction to restore exec mask immediately follows label`

			`; CHECK-LABEL: {{^}}spill_cfg_position:`
			`; CHECK: s_cbranch_execz [[LABEL1:BB[0-9_]+]]`
			`; CHECK: {{^}}[[LABEL1]]:`
			`; CHECK: s_cbranch_execz [[LABEL2:BB[0-9_]+]]`
			`; CHECK: {{^}}[[LABEL2]]:`
			`; CHECK-NEXT: s_or_b64 exec`
			`; CHECK: buffer_`

AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @spill_cfg_position(i32 addrspace(1)* nocapture %arg) {`
[AMDGPU] Prevent spills before exec mask is restored Inline spiller can decide to move a spill as early as possible in the basic block. It will skip phis and label, but we also need to make sure it skips instructions in the basic block prologue which restore exec mask. Added isPositionLike callback in TargetInstrInfo to detect instructions which shall be skipped in addition to common phis, labels etc. Differential Revision: https://reviews.llvm.org/D27997 llvm-svn: 292554 2017-01-20 08:44:31 +08:00			`bb:`
			`%tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #0`
			`%tmp14 = load i32, i32 addrspace(1)* %arg, align 4`
			`%tmp15 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1`
			`%tmp16 = load i32, i32 addrspace(1)* %tmp15, align 4`
			`%tmp17 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2`
			`%tmp18 = load i32, i32 addrspace(1)* %tmp17, align 4`
			`%tmp19 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 3`
			`%tmp20 = load i32, i32 addrspace(1)* %tmp19, align 4`
			`%tmp21 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 4`
			`%tmp22 = load i32, i32 addrspace(1)* %tmp21, align 4`
			`%tmp23 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 5`
			`%tmp24 = load i32, i32 addrspace(1)* %tmp23, align 4`
			`%tmp25 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 6`
			`%tmp26 = load i32, i32 addrspace(1)* %tmp25, align 4`
			`%tmp27 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 7`
			`%tmp28 = load i32, i32 addrspace(1)* %tmp27, align 4`
			`%tmp29 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 8`
			`%tmp30 = load i32, i32 addrspace(1)* %tmp29, align 4`
			`%tmp33 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp1`
			`%tmp34 = load i32, i32 addrspace(1)* %tmp33, align 4`
			`%tmp35 = icmp eq i32 %tmp34, 0`
			`br i1 %tmp35, label %bb44, label %bb36`

			`bb36: ; preds = %bb`
			`%tmp37 = mul nsw i32 %tmp20, %tmp18`
			`%tmp38 = add nsw i32 %tmp37, %tmp16`
			`%tmp39 = mul nsw i32 %tmp24, %tmp22`
			`%tmp40 = add nsw i32 %tmp38, %tmp39`
			`%tmp41 = mul nsw i32 %tmp28, %tmp26`
			`%tmp42 = add nsw i32 %tmp40, %tmp41`
			`%tmp43 = add nsw i32 %tmp42, %tmp30`
			`br label %bb52`

			`bb44: ; preds = %bb`
			`%tmp45 = mul nsw i32 %tmp18, %tmp16`
			`%tmp46 = mul nsw i32 %tmp22, %tmp20`
			`%tmp47 = add nsw i32 %tmp46, %tmp45`
			`%tmp48 = mul nsw i32 %tmp26, %tmp24`
			`%tmp49 = add nsw i32 %tmp47, %tmp48`
			`%tmp50 = mul nsw i32 %tmp30, %tmp28`
			`%tmp51 = add nsw i32 %tmp49, %tmp50`
			`br label %bb52`

			`bb52: ; preds = %bb44, %bb36`
			`%tmp53 = phi i32 [ %tmp43, %bb36 ], [ %tmp51, %bb44 ]`
			`%tmp54 = mul nsw i32 %tmp16, %tmp14`
			`%tmp55 = mul nsw i32 %tmp22, %tmp18`
			`%tmp56 = mul nsw i32 %tmp24, %tmp20`
			`%tmp57 = mul nsw i32 %tmp30, %tmp26`
			`%tmp58 = add i32 %tmp55, %tmp54`
			`%tmp59 = add i32 %tmp58, %tmp56`
			`%tmp60 = add i32 %tmp59, %tmp28`
			`%tmp61 = add i32 %tmp60, %tmp57`
			`%tmp62 = add i32 %tmp61, %tmp53`
			`store i32 %tmp62, i32 addrspace(1)* %tmp33, align 4`
			`ret void`
			`}`

			`declare i32 @llvm.amdgcn.workitem.id.x() #0`

			`attributes #0 = { nounwind readnone }`