2016-08-09 23:35:06 +08:00
|
|
|
; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
|
|
|
|
; RUN: -disable-output < %s | \
|
|
|
|
; RUN: FileCheck -check-prefix=CODE %s
|
|
|
|
|
|
|
|
; RUN: opt %loadPolly -polly-codegen-ppcg -S < %s | \
|
|
|
|
; RUN: FileCheck %s -check-prefix=IR
|
|
|
|
|
|
|
|
; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \
|
|
|
|
; RUN: -disable-output < %s | \
|
|
|
|
; RUN: FileCheck %s -check-prefix=KERNEL-IR
|
|
|
|
|
|
|
|
; REQUIRES: pollyacc
|
|
|
|
|
2017-07-06 21:42:42 +08:00
|
|
|
; Approximate C source:
|
|
|
|
; void kernel_dynprog(int c[50]) {
|
|
|
|
; int iter = 0;
|
|
|
|
; int outl = 0;
|
|
|
|
;
|
|
|
|
; while(1) {
|
|
|
|
; for(int indvar = 1 ; indvar <= 49; indvar++) {
|
|
|
|
; c[indvar] = undef;
|
|
|
|
; }
|
|
|
|
; add78 = c[49] + outl;
|
|
|
|
; inc80 = iter + 1;
|
|
|
|
;
|
|
|
|
; if (true) break;
|
|
|
|
;
|
|
|
|
; outl = add78;
|
|
|
|
; iter = inc80;
|
|
|
|
; }
|
|
|
|
;}
|
2016-08-09 23:35:06 +08:00
|
|
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
|
|
target triple = "x86_64-unknown-linux-gnu"
|
|
|
|
|
|
|
|
; CODE: # host
|
|
|
|
; CODE-NEXT: {
|
|
|
|
; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_out_l_055__phi, &MemRef_out_l_055__phi, sizeof(i32), cudaMemcpyHostToDevice));
|
|
|
|
; CODE-NEXT: {
|
|
|
|
; CODE-NEXT: dim3 k0_dimBlock(32);
|
|
|
|
; CODE-NEXT: dim3 k0_dimGrid(2);
|
|
|
|
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_out_l_055__phi, dev_MemRef_out_l_055, dev_MemRef_c);
|
|
|
|
; CODE-NEXT: cudaCheckKernel();
|
|
|
|
; CODE-NEXT: }
|
|
|
|
|
2017-07-06 21:42:42 +08:00
|
|
|
; CODE: cudaCheckReturn(cudaMemcpy(MemRef_c, dev_MemRef_c, (50) * sizeof(i32), cudaMemcpyDeviceToHost));
|
2016-08-09 23:35:06 +08:00
|
|
|
; CODE-NEXT: }
|
|
|
|
|
|
|
|
; CODE: # kernel0
|
|
|
|
; CODE-NEXT: if (32 * b0 + t0 <= 48) {
|
|
|
|
; CODE-NEXT: if (b0 == 1 && t0 == 16)
|
|
|
|
; CODE-NEXT: Stmt_for_cond1_preheader(0);
|
|
|
|
; CODE-NEXT: Stmt_for_body17(0, 32 * b0 + t0);
|
|
|
|
; CODE-NEXT: if (b0 == 1 && t0 == 16)
|
|
|
|
; CODE-NEXT: Stmt_for_cond15_for_cond12_loopexit_crit_edge(0);
|
|
|
|
; CODE-NEXT: }
|
|
|
|
|
2016-09-15 22:05:58 +08:00
|
|
|
; IR: [[REGA:%.+]] = bitcast i32* %out_l.055.phiops to i8*
|
|
|
|
; IR-NEXT: call void @polly_copyFromHostToDevice(i8* [[REGA]], i8* %p_dev_array_MemRef_out_l_055__phi, i64 4)
|
2016-08-09 23:35:06 +08:00
|
|
|
|
2017-07-06 21:42:42 +08:00
|
|
|
; IR: [[REGC:%.+]] = bitcast i32* %38 to i8*
|
|
|
|
; IR-NEXT: call void @polly_copyFromDeviceToHost(i8* %p_dev_array_MemRef_c, i8* [[REGC]], i64 196)
|
2016-08-09 23:35:06 +08:00
|
|
|
|
|
|
|
; KERNEL-IR: entry:
|
|
|
|
; KERNEL-IR-NEXT: %out_l.055.s2a = alloca i32
|
|
|
|
; KERNEL-IR-NEXT: %out_l.055.phiops = alloca i32
|
[PPCGCodeGeneration] Update PPCG Code Generation for OpenCL compatibility
Added a small change to the way pointer arguments are set in the kernel
code generation. The way the pointer is retrieved now, specifically requests
global address space to be annotated. This is necessary, if the IR should be
run through NVPTX to generate OpenCL compatible PTX.
The changes do not affect the PTX Strings generated for the CUDA target
(nvptx64-nvidia-cuda), but are necessary for OpenCL (nvptx64-nvidia-nvcl).
Additionally, the data layout has been updated to what the NVPTX Backend requests/recommends.
Contributed-by: Philipp Schaad
Reviewers: Meinersbur, grosser, bollu
Reviewed By: grosser, bollu
Subscribers: jlebar, pollydev, llvm-commits, nemanjai, yaxunl, Anastasia
Tags: #polly
Differential Revision: https://reviews.llvm.org/D32215
llvm-svn: 301299
2017-04-25 16:08:29 +08:00
|
|
|
; KERNEL-IR-NEXT: %1 = addrspacecast i8 addrspace(1)* %MemRef_out_l_055__phi to i32*
|
2016-08-09 23:35:06 +08:00
|
|
|
; KERNEL-IR-NEXT: %2 = load i32, i32* %1
|
|
|
|
; KERNEL-IR-NEXT: store i32 %2, i32* %out_l.055.phiops
|
[PPCGCodeGeneration] Update PPCG Code Generation for OpenCL compatibility
Added a small change to the way pointer arguments are set in the kernel
code generation. The way the pointer is retrieved now, specifically requests
global address space to be annotated. This is necessary, if the IR should be
run through NVPTX to generate OpenCL compatible PTX.
The changes do not affect the PTX Strings generated for the CUDA target
(nvptx64-nvidia-cuda), but are necessary for OpenCL (nvptx64-nvidia-nvcl).
Additionally, the data layout has been updated to what the NVPTX Backend requests/recommends.
Contributed-by: Philipp Schaad
Reviewers: Meinersbur, grosser, bollu
Reviewed By: grosser, bollu
Subscribers: jlebar, pollydev, llvm-commits, nemanjai, yaxunl, Anastasia
Tags: #polly
Differential Revision: https://reviews.llvm.org/D32215
llvm-svn: 301299
2017-04-25 16:08:29 +08:00
|
|
|
; KERNEL-IR-NEXT: %3 = addrspacecast i8 addrspace(1)* %MemRef_out_l_055 to i32*
|
2016-08-09 23:35:06 +08:00
|
|
|
; KERNEL-IR-NEXT: %4 = load i32, i32* %3
|
|
|
|
; KERNEL-IR-NEXT: store i32 %4, i32* %out_l.055.s2a
|
|
|
|
|
|
|
|
|
|
|
|
define void @kernel_dynprog([50 x i32]* %c) {
|
|
|
|
entry:
|
|
|
|
%arrayidx77 = getelementptr inbounds [50 x i32], [50 x i32]* %c, i64 0, i64 49
|
|
|
|
br label %for.cond1.preheader
|
|
|
|
|
|
|
|
for.cond1.preheader: ; preds = %for.cond15.for.cond12.loopexit_crit_edge, %entry
|
|
|
|
%out_l.055 = phi i32 [ 0, %entry ], [ %add78, %for.cond15.for.cond12.loopexit_crit_edge ]
|
|
|
|
%iter.054 = phi i32 [ 0, %entry ], [ %inc80, %for.cond15.for.cond12.loopexit_crit_edge ]
|
|
|
|
br label %for.body17
|
|
|
|
|
|
|
|
for.cond15.for.cond12.loopexit_crit_edge: ; preds = %for.body17
|
|
|
|
%tmp = load i32, i32* %arrayidx77, align 4
|
|
|
|
%add78 = add nsw i32 %tmp, %out_l.055
|
|
|
|
%inc80 = add nuw nsw i32 %iter.054, 1
|
|
|
|
br i1 false, label %for.cond1.preheader, label %for.end81
|
|
|
|
|
|
|
|
for.body17: ; preds = %for.body17, %for.cond1.preheader
|
|
|
|
%indvars.iv71 = phi i64 [ 1, %for.cond1.preheader ], [ %indvars.iv.next72, %for.body17 ]
|
|
|
|
%arrayidx69 = getelementptr inbounds [50 x i32], [50 x i32]* %c, i64 0, i64 %indvars.iv71
|
|
|
|
store i32 undef, i32* %arrayidx69, align 4
|
|
|
|
%indvars.iv.next72 = add nuw nsw i64 %indvars.iv71, 1
|
|
|
|
%lftr.wideiv74 = trunc i64 %indvars.iv.next72 to i32
|
|
|
|
%exitcond75 = icmp ne i32 %lftr.wideiv74, 50
|
|
|
|
br i1 %exitcond75, label %for.body17, label %for.cond15.for.cond12.loopexit_crit_edge
|
|
|
|
|
|
|
|
for.end81: ; preds = %for.cond15.for.cond12.loopexit_crit_edge
|
|
|
|
ret void
|
|
|
|
}
|