GPGPU: Mark kernel functions as polly.skip

Otherwise, we would try to re-optimize them with Polly-ACC and possibly even
generate kernels that try to offload themselves, which does not work as the
GPURuntime is not available on the accelerator and also does not make any
sense.

llvm-svn: 277589
This commit is contained in:
Tobias Grosser 2016-08-03 12:00:07 +00:00
parent c59b3a2236
commit 629109b633
7 changed files with 11 additions and 7 deletions

View File

@ -564,7 +564,7 @@ public:
/// the function.
///
/// @param F The function to mark as invalid.
void markFunctionAsInvalid(Function *F) const;
static void markFunctionAsInvalid(Function *F);
/// @brief Verify if all valid Regions in this Function are still valid
/// after some transformations.

View File

@ -1402,7 +1402,7 @@ bool ScopDetection::isValidRegion(DetectionContext &Context) const {
return true;
}
void ScopDetection::markFunctionAsInvalid(Function *F) const {
void ScopDetection::markFunctionAsInvalid(Function *F) {
F->addFnAttr(PollySkipFnAttr);
}

View File

@ -17,6 +17,7 @@
#include "polly/DependenceInfo.h"
#include "polly/LinkAllPasses.h"
#include "polly/Options.h"
#include "polly/ScopDetection.h"
#include "polly/ScopInfo.h"
#include "polly/Support/SCEVValidator.h"
#include "llvm/ADT/PostOrderIterator.h"
@ -1170,6 +1171,8 @@ void GPUNodeBuilder::createKernelFunction(ppcg_kernel *Kernel,
Builder.CreateRetVoid();
Builder.SetInsertPoint(EntryBlock, EntryBlock->begin());
ScopDetection::markFunctionAsInvalid(FN);
insertKernelIntrinsics(Kernel);
}

View File

@ -113,7 +113,7 @@
; IR: polly.exiting:
; IR-NEXT: br label %polly.merge_new_and_old
; KERNEL-IR-LABEL: define ptx_kernel void @kernel_0(i8* %MemRef_A) {
; KERNEL-IR-LABEL: define ptx_kernel void @kernel_0(i8* %MemRef_A) #0 {
; KERNEL-IR-NEXT: entry:
; KERNEL-IR-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
; KERNEL-IR-NEXT: %b0 = zext i32 %0 to i64
@ -171,6 +171,7 @@
; KERNEL-IR-LABEL: polly.loop_preheader: ; preds = %entry
; KERNEL-IR-NEXT: br label %polly.loop_header
; KERNEL-IR: attributes #0 = { "polly.skip.fn" }
; KERNEL-ASM: .version 3.2
; KERNEL-ASM-NEXT: .target sm_30

View File

@ -42,7 +42,7 @@
; IR-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar, 98
; IR-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit
; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %c0) {
; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %c0)
; KERNEL-IR-LABEL: entry:
; KERNEL-IR-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
; KERNEL-IR-NEXT: %b0 = zext i32 %0 to i64

View File

@ -21,7 +21,7 @@
; KERNEL-NEXT: target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda"
; KERNEL: define ptx_kernel void @kernel_0(i8* %MemRef_A) {
; KERNEL: define ptx_kernel void @kernel_0(i8* %MemRef_A)
; KERNEL-NEXT: entry:
; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
; KERNEL-NEXT: %b0 = zext i32 %0 to i64
@ -36,7 +36,7 @@
; KERNEL-NEXT: target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda"
; KERNEL: define ptx_kernel void @kernel_1(i8* %MemRef_B) {
; KERNEL: define ptx_kernel void @kernel_1(i8* %MemRef_B)
; KERNEL-NEXT: entry:
; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
; KERNEL-NEXT: %b0 = zext i32 %0 to i64

View File

@ -9,7 +9,7 @@
; A[i] += 42;
; }
; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %n) {
; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %n)
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"