llvm-project/llvm/test/CodeGen/NVPTX/nounroll.ll

; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s

target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-unknown-unknown"

; Compiled from the following CUDA code:
;
;   #pragma nounroll
;   for (int i = 0; i < 2; ++i)
;     output[i] = input[i];
define void @nounroll(float* %input, float* %output) {
; CHECK-LABEL: .visible .func nounroll(
entry:
  br label %for.body

for.body:
; CHECK: .pragma "nounroll"
  %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
  %idxprom = sext i32 %i.06 to i64
  %arrayidx = getelementptr inbounds float* %input, i64 %idxprom
  %0 = load float* %arrayidx, align 4
; CHECK: ld.f32
  %arrayidx2 = getelementptr inbounds float* %output, i64 %idxprom
  store float %0, float* %arrayidx2, align 4
; CHECK: st.f32
  %inc = add nuw nsw i32 %i.06, 1
  %exitcond = icmp eq i32 %inc, 2
  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
; CHECK-NOT: ld.f32
; CHECK-NOT: st.f32

for.end:
  ret void
}

!0 = distinct !{!0, !1}
!1 = !{!"llvm.loop.unroll.disable"}
[NVPTX] Emit .pragma "nounroll" for loops marked with nounroll Summary: CUDA driver can unroll loops when jit-compiling PTX. To prevent CUDA driver from unrolling a loop marked with llvm.loop.unroll.disable is not unrolled by CUDA driver, we need to emit .pragma "nounroll" at the header of that loop. This patch also extracts getting unroll metadata from loop ID metadata into a shared helper function. Test Plan: test/CodeGen/NVPTX/nounroll.ll Reviewers: eliben, meheff, jholewinski Reviewed By: jholewinski Subscribers: jholewinski, llvm-commits Differential Revision: http://reviews.llvm.org/D7041 llvm-svn: 227703 2015-02-01 10:27:45 +08:00			`; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 \| FileCheck %s`

			`target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"`
			`target triple = "nvptx64-unknown-unknown"`

			`; Compiled from the following CUDA code:`
			`;`
			`; #pragma nounroll`
			`; for (int i = 0; i < 2; ++i)`
			`; output[i] = input[i];`
			`define void @nounroll(float* %input, float* %output) {`
			`; CHECK-LABEL: .visible .func nounroll(`
			`entry:`
			`br label %for.body`

			`for.body:`
			`; CHECK: .pragma "nounroll"`
			`%i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]`
			`%idxprom = sext i32 %i.06 to i64`
			`%arrayidx = getelementptr inbounds float* %input, i64 %idxprom`
			`%0 = load float* %arrayidx, align 4`
			`; CHECK: ld.f32`
			`%arrayidx2 = getelementptr inbounds float* %output, i64 %idxprom`
			`store float %0, float* %arrayidx2, align 4`
			`; CHECK: st.f32`
			`%inc = add nuw nsw i32 %i.06, 1`
			`%exitcond = icmp eq i32 %inc, 2`
			`br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0`
			`; CHECK-NOT: ld.f32`
			`; CHECK-NOT: st.f32`

			`for.end:`
			`ret void`
			`}`

			`!0 = distinct !{!0, !1}`
			`!1 = !{!"llvm.loop.unroll.disable"}`