forked from OSchip/llvm-project
GPGPU: Mark kernel functions as polly.skip
Otherwise, we would try to re-optimize them with Polly-ACC and possibly even generate kernels that try to offload themselves, which does not work as the GPURuntime is not available on the accelerator and also does not make any sense. llvm-svn: 277589
This commit is contained in:
parent
c59b3a2236
commit
629109b633
|
@ -564,7 +564,7 @@ public:
|
|||
/// the function.
|
||||
///
|
||||
/// @param F The function to mark as invalid.
|
||||
void markFunctionAsInvalid(Function *F) const;
|
||||
static void markFunctionAsInvalid(Function *F);
|
||||
|
||||
/// @brief Verify if all valid Regions in this Function are still valid
|
||||
/// after some transformations.
|
||||
|
|
|
@ -1402,7 +1402,7 @@ bool ScopDetection::isValidRegion(DetectionContext &Context) const {
|
|||
return true;
|
||||
}
|
||||
|
||||
void ScopDetection::markFunctionAsInvalid(Function *F) const {
|
||||
void ScopDetection::markFunctionAsInvalid(Function *F) {
|
||||
F->addFnAttr(PollySkipFnAttr);
|
||||
}
|
||||
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "polly/DependenceInfo.h"
|
||||
#include "polly/LinkAllPasses.h"
|
||||
#include "polly/Options.h"
|
||||
#include "polly/ScopDetection.h"
|
||||
#include "polly/ScopInfo.h"
|
||||
#include "polly/Support/SCEVValidator.h"
|
||||
#include "llvm/ADT/PostOrderIterator.h"
|
||||
|
@ -1170,6 +1171,8 @@ void GPUNodeBuilder::createKernelFunction(ppcg_kernel *Kernel,
|
|||
Builder.CreateRetVoid();
|
||||
Builder.SetInsertPoint(EntryBlock, EntryBlock->begin());
|
||||
|
||||
ScopDetection::markFunctionAsInvalid(FN);
|
||||
|
||||
insertKernelIntrinsics(Kernel);
|
||||
}
|
||||
|
||||
|
|
|
@ -113,7 +113,7 @@
|
|||
; IR: polly.exiting:
|
||||
; IR-NEXT: br label %polly.merge_new_and_old
|
||||
|
||||
; KERNEL-IR-LABEL: define ptx_kernel void @kernel_0(i8* %MemRef_A) {
|
||||
; KERNEL-IR-LABEL: define ptx_kernel void @kernel_0(i8* %MemRef_A) #0 {
|
||||
; KERNEL-IR-NEXT: entry:
|
||||
; KERNEL-IR-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
|
||||
; KERNEL-IR-NEXT: %b0 = zext i32 %0 to i64
|
||||
|
@ -171,6 +171,7 @@
|
|||
; KERNEL-IR-LABEL: polly.loop_preheader: ; preds = %entry
|
||||
; KERNEL-IR-NEXT: br label %polly.loop_header
|
||||
|
||||
; KERNEL-IR: attributes #0 = { "polly.skip.fn" }
|
||||
|
||||
; KERNEL-ASM: .version 3.2
|
||||
; KERNEL-ASM-NEXT: .target sm_30
|
||||
|
|
|
@ -42,7 +42,7 @@
|
|||
; IR-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar, 98
|
||||
; IR-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit
|
||||
|
||||
; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %c0) {
|
||||
; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %c0)
|
||||
; KERNEL-IR-LABEL: entry:
|
||||
; KERNEL-IR-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
|
||||
; KERNEL-IR-NEXT: %b0 = zext i32 %0 to i64
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
; KERNEL-NEXT: target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
|
||||
; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda"
|
||||
|
||||
; KERNEL: define ptx_kernel void @kernel_0(i8* %MemRef_A) {
|
||||
; KERNEL: define ptx_kernel void @kernel_0(i8* %MemRef_A)
|
||||
; KERNEL-NEXT: entry:
|
||||
; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
|
||||
; KERNEL-NEXT: %b0 = zext i32 %0 to i64
|
||||
|
@ -36,7 +36,7 @@
|
|||
; KERNEL-NEXT: target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
|
||||
; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda"
|
||||
|
||||
; KERNEL: define ptx_kernel void @kernel_1(i8* %MemRef_B) {
|
||||
; KERNEL: define ptx_kernel void @kernel_1(i8* %MemRef_B)
|
||||
; KERNEL-NEXT: entry:
|
||||
; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
|
||||
; KERNEL-NEXT: %b0 = zext i32 %0 to i64
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
; A[i] += 42;
|
||||
; }
|
||||
|
||||
; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %n) {
|
||||
; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %n)
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
|
|
Loading…
Reference in New Issue