forked from OSchip/llvm-project
GPGPU: add intrinsic functions to obtain a kernels thread and block ids
llvm-svn: 275953
This commit is contained in:
parent
32837fe313
commit
472f9654c8
|
@ -97,6 +97,17 @@ private:
|
|||
/// The GPU program we generate code for.
|
||||
gpu_prog *Prog;
|
||||
|
||||
/// Class to free isl_ids.
|
||||
class IslIdDeleter {
|
||||
public:
|
||||
void operator()(__isl_take isl_id *Id) { isl_id_free(Id); };
|
||||
};
|
||||
|
||||
/// A set containing all isl_ids allocated in a GPU kernel.
|
||||
///
|
||||
/// By releasing this set all isl_ids will be freed.
|
||||
std::set<std::unique_ptr<isl_id, IslIdDeleter>> KernelIDs;
|
||||
|
||||
/// Create code for user-defined AST nodes.
|
||||
///
|
||||
/// These AST nodes can be of type:
|
||||
|
@ -137,6 +148,11 @@ private:
|
|||
/// @returns The newly declared function.
|
||||
Function *createKernelFunctionDecl(ppcg_kernel *Kernel);
|
||||
|
||||
/// Insert intrinsic functions to obtain thread and block ids.
|
||||
///
|
||||
/// @param The kernel to generate the intrinsic functions for.
|
||||
void insertKernelIntrinsics(ppcg_kernel *Kernel);
|
||||
|
||||
/// Finalize the generation of the kernel function.
|
||||
///
|
||||
/// Free the LLVM-IR module corresponding to the kernel and -- if requested --
|
||||
|
@ -172,10 +188,12 @@ void GPUNodeBuilder::createKernel(__isl_take isl_ast_node *KernelStmt) {
|
|||
assert(Kernel->tree && "Device AST of kernel node is empty");
|
||||
|
||||
Instruction &HostInsertPoint = *Builder.GetInsertPoint();
|
||||
IslExprBuilder::IDToValueTy HostIDs = IDToValue;
|
||||
|
||||
createKernelFunction(Kernel);
|
||||
|
||||
Builder.SetInsertPoint(&HostInsertPoint);
|
||||
IDToValue = HostIDs;
|
||||
|
||||
finalizeKernelFunction();
|
||||
}
|
||||
|
@ -222,6 +240,35 @@ Function *GPUNodeBuilder::createKernelFunctionDecl(ppcg_kernel *Kernel) {
|
|||
return FN;
|
||||
}
|
||||
|
||||
void GPUNodeBuilder::insertKernelIntrinsics(ppcg_kernel *Kernel) {
|
||||
Intrinsic::ID IntrinsicsBID[] = {Intrinsic::nvvm_read_ptx_sreg_ctaid_x,
|
||||
Intrinsic::nvvm_read_ptx_sreg_ctaid_y};
|
||||
|
||||
Intrinsic::ID IntrinsicsTID[] = {Intrinsic::nvvm_read_ptx_sreg_tid_x,
|
||||
Intrinsic::nvvm_read_ptx_sreg_tid_y,
|
||||
Intrinsic::nvvm_read_ptx_sreg_tid_z};
|
||||
|
||||
auto addId = [this](__isl_take isl_id *Id, Intrinsic::ID Intr) mutable {
|
||||
std::string Name = isl_id_get_name(Id);
|
||||
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
|
||||
Function *IntrinsicFn = Intrinsic::getDeclaration(M, Intr);
|
||||
Value *Val = Builder.CreateCall(IntrinsicFn, {});
|
||||
Val = Builder.CreateIntCast(Val, Builder.getInt64Ty(), false, Name);
|
||||
IDToValue[Id] = Val;
|
||||
KernelIDs.insert(std::unique_ptr<isl_id, IslIdDeleter>(Id));
|
||||
};
|
||||
|
||||
for (int i = 0; i < Kernel->n_grid; ++i) {
|
||||
isl_id *Id = isl_id_list_get_id(Kernel->block_ids, i);
|
||||
addId(Id, IntrinsicsBID[i]);
|
||||
}
|
||||
|
||||
for (int i = 0; i < Kernel->n_block; ++i) {
|
||||
isl_id *Id = isl_id_list_get_id(Kernel->thread_ids, i);
|
||||
addId(Id, IntrinsicsTID[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void GPUNodeBuilder::createKernelFunction(ppcg_kernel *Kernel) {
|
||||
|
||||
std::string Identifier = "kernel_" + std::to_string(Kernel->id);
|
||||
|
@ -236,6 +283,8 @@ void GPUNodeBuilder::createKernelFunction(ppcg_kernel *Kernel) {
|
|||
Builder.SetInsertPoint(EntryBlock);
|
||||
Builder.CreateRetVoid();
|
||||
Builder.SetInsertPoint(EntryBlock, EntryBlock->begin());
|
||||
|
||||
insertKernelIntrinsics(Kernel);
|
||||
}
|
||||
|
||||
void GPUNodeBuilder::finalizeKernelFunction() {
|
||||
|
@ -244,6 +293,7 @@ void GPUNodeBuilder::finalizeKernelFunction() {
|
|||
outs() << *GPUModule << "\n";
|
||||
|
||||
GPUModule.release();
|
||||
KernelIDs.clear();
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
|
|
@ -10,6 +10,10 @@
|
|||
; RUN: opt %loadPolly -polly-codegen-ppcg -S < %s | \
|
||||
; RUN: FileCheck %s -check-prefix=IR
|
||||
|
||||
; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \
|
||||
; RUN: -disable-output < %s | \
|
||||
; RUN: FileCheck %s -check-prefix=KERNEL-IR
|
||||
|
||||
; REQUIRES: pollyacc
|
||||
|
||||
; CHECK: Stmt_bb5
|
||||
|
@ -89,6 +93,19 @@
|
|||
; IR: polly.exiting:
|
||||
; IR-NEXT: br label %polly.merge_new_and_old
|
||||
|
||||
; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A) {
|
||||
; KERNEL-IR-NEXT: entry:
|
||||
; KERNEL-IR-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
|
||||
; KERNEL-IR-NEXT: %b0 = zext i32 %0 to i64
|
||||
; KERNEL-IR-NEXT: %1 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
|
||||
; KERNEL-IR-NEXT: %b1 = zext i32 %1 to i64
|
||||
; KERNEL-IR-NEXT: %2 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
|
||||
; KERNEL-IR-NEXT: %t0 = zext i32 %2 to i64
|
||||
; KERNEL-IR-NEXT: %3 = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
|
||||
; KERNEL-IR-NEXT: %t1 = zext i32 %3 to i64
|
||||
; KERNEL-IR-NEXT: ret void
|
||||
; KERNEL-IR-NEXT: }
|
||||
|
||||
; void double_parallel_loop(float A[][1024]) {
|
||||
; for (long i = 0; i < 1024; i++)
|
||||
; for (long j = 0; j < 1024; j++)
|
||||
|
|
|
@ -17,6 +17,10 @@
|
|||
|
||||
; KERNEL: define ptx_kernel void @kernel_0(i8* %MemRef_A) {
|
||||
; KERNEL-NEXT: entry:
|
||||
; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
|
||||
; KERNEL-NEXT: %b0 = zext i32 %0 to i64
|
||||
; KERNEL-NEXT: %1 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
|
||||
; KERNEL-NEXT: %t0 = zext i32 %1 to i64
|
||||
; KERNEL-NEXT: ret void
|
||||
; KERNEL-NEXT: }
|
||||
|
||||
|
@ -27,6 +31,10 @@
|
|||
|
||||
; KERNEL: define ptx_kernel void @kernel_1(i8* %MemRef_B) {
|
||||
; KERNEL-NEXT: entry:
|
||||
; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
|
||||
; KERNEL-NEXT: %b0 = zext i32 %0 to i64
|
||||
; KERNEL-NEXT: %1 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
|
||||
; KERNEL-NEXT: %t0 = zext i32 %1 to i64
|
||||
; KERNEL-NEXT: ret void
|
||||
; KERNEL-NEXT: }
|
||||
|
||||
|
|
Loading…
Reference in New Issue