GPGPU: Mark kernel functions as polly.skip

Otherwise, we would try to re-optimize them with Polly-ACC and possibly even generate kernels that try to offload themselves, which does not work as the GPURuntime is not available on the accelerator and also does not make any sense. llvm-svn: 277589
2016-08-03 12:00:07 +00:00 · 2016-08-03 12:00:07 +00:00 · 629109b633
parent c59b3a2236
commit 629109b633
7 changed files with 11 additions and 7 deletions
--- a/polly/include/polly/ScopDetection.h
+++ b/polly/include/polly/ScopDetection.h
@ -564,7 +564,7 @@ public:
  ///        the function.
  ///
  /// @param F The function to mark as invalid.
-  void markFunctionAsInvalid(Function *F) const;
+  static void markFunctionAsInvalid(Function *F);

  /// @brief Verify if all valid Regions in this Function are still valid
  /// after some transformations.
--- a/polly/lib/Analysis/ScopDetection.cpp
+++ b/polly/lib/Analysis/ScopDetection.cpp
@ -1402,7 +1402,7 @@ bool ScopDetection::isValidRegion(DetectionContext &Context) const {
  return true;
 }

-void ScopDetection::markFunctionAsInvalid(Function *F) const {
+void ScopDetection::markFunctionAsInvalid(Function *F) {
  F->addFnAttr(PollySkipFnAttr);
 }

--- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp
+++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
@ -17,6 +17,7 @@
 #include "polly/DependenceInfo.h"
 #include "polly/LinkAllPasses.h"
 #include "polly/Options.h"
+#include "polly/ScopDetection.h"
 #include "polly/ScopInfo.h"
 #include "polly/Support/SCEVValidator.h"
 #include "llvm/ADT/PostOrderIterator.h"
@ -1170,6 +1171,8 @@ void GPUNodeBuilder::createKernelFunction(ppcg_kernel *Kernel,
  Builder.CreateRetVoid();
  Builder.SetInsertPoint(EntryBlock, EntryBlock->begin());

+  ScopDetection::markFunctionAsInvalid(FN);
+
  insertKernelIntrinsics(Kernel);
 }

--- a/polly/test/GPGPU/double-parallel-loop.ll
+++ b/polly/test/GPGPU/double-parallel-loop.ll
@ -113,7 +113,7 @@
 ; IR: polly.exiting:
 ; IR-NEXT:    br label %polly.merge_new_and_old

-; KERNEL-IR-LABEL: define ptx_kernel void @kernel_0(i8* %MemRef_A) {
+; KERNEL-IR-LABEL: define ptx_kernel void @kernel_0(i8* %MemRef_A) #0 {
 ; KERNEL-IR-NEXT: entry:
 ; KERNEL-IR-NEXT:   %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
 ; KERNEL-IR-NEXT:   %b0 = zext i32 %0 to i64
@ -171,6 +171,7 @@
 ; KERNEL-IR-LABEL: polly.loop_preheader:                             ; preds = %entry
 ; KERNEL-IR-NEXT:   br label %polly.loop_header

+; KERNEL-IR: attributes #0 = { "polly.skip.fn" }

 ; KERNEL-ASM: .version 3.2
 ; KERNEL-ASM-NEXT: .target sm_30
--- a/polly/test/GPGPU/host-control-flow.ll
+++ b/polly/test/GPGPU/host-control-flow.ll
@ -42,7 +42,7 @@
 ; IR-NEXT:   %polly.loop_cond = icmp sle i64 %polly.indvar, 98
 ; IR-NEXT:   br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit

-; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %c0) {
+; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %c0)
 ; KERNEL-IR-LABEL: entry:
 ; KERNEL-IR-NEXT:   %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
 ; KERNEL-IR-NEXT:   %b0 = zext i32 %0 to i64
--- a/polly/test/GPGPU/kernel-params-only-some-arrays.ll
+++ b/polly/test/GPGPU/kernel-params-only-some-arrays.ll
@ -21,7 +21,7 @@
 ; KERNEL-NEXT: target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
 ; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda"

-; KERNEL: define ptx_kernel void @kernel_0(i8* %MemRef_A) {
+; KERNEL: define ptx_kernel void @kernel_0(i8* %MemRef_A)
 ; KERNEL-NEXT:   entry:
 ; KERNEL-NEXT:     %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
 ; KERNEL-NEXT:     %b0 = zext i32 %0 to i64
@ -36,7 +36,7 @@
 ; KERNEL-NEXT: target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
 ; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda"

-; KERNEL: define ptx_kernel void @kernel_1(i8* %MemRef_B) {
+; KERNEL: define ptx_kernel void @kernel_1(i8* %MemRef_B)
 ; KERNEL-NEXT:   entry:
 ; KERNEL-NEXT:     %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
 ; KERNEL-NEXT:     %b0 = zext i32 %0 to i64
--- a/polly/test/GPGPU/kernel-params-scop-parameter.ll
+++ b/polly/test/GPGPU/kernel-params-scop-parameter.ll
@ -9,7 +9,7 @@
 ;        A[i] += 42;
 ;    }

-; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %n) {
+; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %n)

 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"