Revert "[Polly] Added OpenCL Runtime to GPURuntime Library for GPGPU CodeGen"

This reverts commit 17a84e414adb51ee375d14836d4c2a817b191933.

Patches should have been submitted in the order of:

1. D32852
2. D32854
3. D32431

I mistakenly pushed D32431(3) first. Reverting to push in the correct
order.

llvm-svn: 302217
This commit is contained in:
Siddharth Bhat 2017-05-05 09:02:08 +00:00
parent 2b0fae877e
commit c1267b9baa
10 changed files with 161 additions and 1433 deletions

View File

@ -152,10 +152,9 @@ SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
option(POLLY_ENABLE_GPGPU_CODEGEN "Enable GPGPU code generation feature" OFF)
if (POLLY_ENABLE_GPGPU_CODEGEN)
# Do not require CUDA/OpenCL, as GPU code generation test cases can be run
# without a CUDA/OpenCL library.
# Do not require CUDA, as GPU code generation test cases can be run without
# a cuda library.
FIND_PACKAGE(CUDA)
FIND_PACKAGE(OpenCL)
set(GPU_CODEGEN TRUE)
else(POLLY_ENABLE_GPGPU_CODEGEN)
set(GPU_CODEGEN FALSE)
@ -164,13 +163,8 @@ endif(POLLY_ENABLE_GPGPU_CODEGEN)
# Support GPGPU code generation if the library is available.
if (CUDALIB_FOUND)
add_definitions(-DHAS_LIBCUDART)
INCLUDE_DIRECTORIES( ${CUDALIB_INCLUDE_DIR} )
endif(CUDALIB_FOUND)
if (OpenCL_FOUND)
add_definitions(-DHAS_LIBOPENCL)
INCLUDE_DIRECTORIES( ${OpenCL_INCLUDE_DIR} )
endif(OpenCL_FOUND)
option(POLLY_BUNDLED_ISL "Use the bundled version of libisl included in Polly" ON)
if (NOT POLLY_BUNDLED_ISL)

View File

@ -1,24 +0,0 @@
//===--- polly/PPCGCodeGeneration.h - Polly Accelerator Code Generation. --===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Take a scop created by ScopInfo and map it to GPU code using the ppcg
// GPU mapping strategy.
//
//===----------------------------------------------------------------------===//
#ifndef POLLY_PPCGCODEGENERATION_H
#define POLLY_PPCGCODEGENERATION_H
/// The GPU Architecture to target.
enum GPUArch { NVPTX64 };
/// The GPU Runtime implementation to use.
enum GPURuntime { CUDA, OpenCL };
#endif // POLLY_PPCGCODEGENERATION_H

View File

@ -15,7 +15,6 @@
#ifndef POLLY_LINKALLPASSES_H
#define POLLY_LINKALLPASSES_H
#include "polly/CodeGen/PPCGCodeGeneration.h"
#include "polly/Config/config.h"
#include "polly/PruneUnprofitable.h"
#include "polly/Simplify.h"
@ -49,8 +48,7 @@ llvm::Pass *createScopInfoWrapperPassPass();
llvm::Pass *createIslAstInfoPass();
llvm::Pass *createCodeGenerationPass();
#ifdef GPU_CODEGEN
llvm::Pass *createPPCGCodeGenerationPass(GPUArch Arch = GPUArch::NVPTX64,
GPURuntime Runtime = GPURuntime::CUDA);
llvm::Pass *createPPCGCodeGenerationPass();
#endif
llvm::Pass *createIslScheduleOptimizerPass();
llvm::Pass *createFlattenSchedulePass();

View File

@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
#include "polly/CodeGen/PPCGCodeGeneration.h"
#include "polly/CodeGen/IslAst.h"
#include "polly/CodeGen/IslNodeBuilder.h"
#include "polly/CodeGen/Utils.h"
@ -154,9 +153,9 @@ public:
GPUNodeBuilder(PollyIRBuilder &Builder, ScopAnnotator &Annotator,
const DataLayout &DL, LoopInfo &LI, ScalarEvolution &SE,
DominatorTree &DT, Scop &S, BasicBlock *StartBlock,
gpu_prog *Prog, GPURuntime Runtime, GPUArch Arch)
gpu_prog *Prog)
: IslNodeBuilder(Builder, Annotator, DL, LI, SE, DT, S, StartBlock),
Prog(Prog), Runtime(Runtime), Arch(Arch) {
Prog(Prog) {
getExprBuilder().setIDToSAI(&IDToSAI);
}
@ -202,12 +201,6 @@ private:
/// The GPU program we generate code for.
gpu_prog *Prog;
/// The GPU Runtime implementation to use (OpenCL or CUDA).
GPURuntime Runtime;
/// The GPU Architecture to target.
GPUArch Arch;
/// Class to free isl_ids.
class IslIdDeleter {
public:
@ -759,17 +752,7 @@ void GPUNodeBuilder::createCallSynchronizeDevice() {
}
Value *GPUNodeBuilder::createCallInitContext() {
const char *Name;
switch (Runtime) {
case GPURuntime::CUDA:
Name = "polly_initContextCUDA";
break;
case GPURuntime::OpenCL:
Name = "polly_initContextCL";
break;
}
const char *Name = "polly_initContext";
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
Function *F = M->getFunction(Name);
@ -1045,15 +1028,7 @@ void GPUNodeBuilder::createScopStmt(isl_ast_expr *Expr,
void GPUNodeBuilder::createKernelSync() {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
Function *Sync;
switch (Arch) {
case GPUArch::NVPTX64:
Sync = Intrinsic::getDeclaration(M, Intrinsic::nvvm_barrier0);
break;
}
auto *Sync = Intrinsic::getDeclaration(M, Intrinsic::nvvm_barrier0);
Builder.CreateCall(Sync, {});
}
@ -1459,12 +1434,7 @@ GPUNodeBuilder::createKernelFunctionDecl(ppcg_kernel *Kernel,
auto *FT = FunctionType::get(Builder.getVoidTy(), Args, false);
auto *FN = Function::Create(FT, Function::ExternalLinkage, Identifier,
GPUModule.get());
switch (Arch) {
case GPUArch::NVPTX64:
FN->setCallingConv(CallingConv::PTX_Kernel);
break;
}
FN->setCallingConv(CallingConv::PTX_Kernel);
auto Arg = FN->arg_begin();
for (long i = 0; i < Kernel->n_array; i++) {
@ -1525,19 +1495,12 @@ GPUNodeBuilder::createKernelFunctionDecl(ppcg_kernel *Kernel,
}
void GPUNodeBuilder::insertKernelIntrinsics(ppcg_kernel *Kernel) {
Intrinsic::ID IntrinsicsBID[2];
Intrinsic::ID IntrinsicsTID[3];
Intrinsic::ID IntrinsicsBID[] = {Intrinsic::nvvm_read_ptx_sreg_ctaid_x,
Intrinsic::nvvm_read_ptx_sreg_ctaid_y};
switch (Arch) {
case GPUArch::NVPTX64:
IntrinsicsBID[0] = Intrinsic::nvvm_read_ptx_sreg_ctaid_x;
IntrinsicsBID[1] = Intrinsic::nvvm_read_ptx_sreg_ctaid_y;
IntrinsicsTID[0] = Intrinsic::nvvm_read_ptx_sreg_tid_x;
IntrinsicsTID[1] = Intrinsic::nvvm_read_ptx_sreg_tid_y;
IntrinsicsTID[2] = Intrinsic::nvvm_read_ptx_sreg_tid_z;
break;
}
Intrinsic::ID IntrinsicsTID[] = {Intrinsic::nvvm_read_ptx_sreg_tid_x,
Intrinsic::nvvm_read_ptx_sreg_tid_y,
Intrinsic::nvvm_read_ptx_sreg_tid_z};
auto addId = [this](__isl_take isl_id *Id, Intrinsic::ID Intr) mutable {
std::string Name = isl_id_get_name(Id);
@ -1686,18 +1649,11 @@ void GPUNodeBuilder::createKernelVariables(ppcg_kernel *Kernel, Function *FN) {
void GPUNodeBuilder::createKernelFunction(ppcg_kernel *Kernel,
SetVector<Value *> &SubtreeValues) {
std::string Identifier = "kernel_" + std::to_string(Kernel->id);
GPUModule.reset(new Module(Identifier, Builder.getContext()));
switch (Arch) {
case GPUArch::NVPTX64:
if (Runtime == GPURuntime::CUDA)
GPUModule->setTargetTriple(Triple::normalize("nvptx64-nvidia-cuda"));
else if (Runtime == GPURuntime::OpenCL)
GPUModule->setTargetTriple(Triple::normalize("nvptx64-nvidia-nvcl"));
GPUModule->setDataLayout(computeNVPTXDataLayout(true /* is64Bit */));
break;
}
GPUModule->setTargetTriple(Triple::normalize("nvptx64-nvidia-cuda"));
GPUModule->setDataLayout(computeNVPTXDataLayout(true /* is64Bit */));
Function *FN = createKernelFunctionDecl(Kernel, SubtreeValues);
@ -1718,21 +1674,7 @@ void GPUNodeBuilder::createKernelFunction(ppcg_kernel *Kernel,
}
std::string GPUNodeBuilder::createKernelASM() {
llvm::Triple GPUTriple;
switch (Arch) {
case GPUArch::NVPTX64:
switch (Runtime) {
case GPURuntime::CUDA:
GPUTriple = llvm::Triple(Triple::normalize("nvptx64-nvidia-cuda"));
break;
case GPURuntime::OpenCL:
GPUTriple = llvm::Triple(Triple::normalize("nvptx64-nvidia-nvcl"));
break;
}
break;
}
llvm::Triple GPUTriple(Triple::normalize("nvptx64-nvidia-cuda"));
std::string ErrMsg;
auto GPUTarget = TargetRegistry::lookupTarget(GPUTriple.getTriple(), ErrMsg);
@ -1743,17 +1685,9 @@ std::string GPUNodeBuilder::createKernelASM() {
TargetOptions Options;
Options.UnsafeFPMath = FastMath;
std::string subtarget;
switch (Arch) {
case GPUArch::NVPTX64:
subtarget = CudaVersion;
break;
}
std::unique_ptr<TargetMachine> TargetM(GPUTarget->createTargetMachine(
GPUTriple.getTriple(), subtarget, "", Options, Optional<Reloc::Model>()));
std::unique_ptr<TargetMachine> TargetM(
GPUTarget->createTargetMachine(GPUTriple.getTriple(), CudaVersion, "",
Options, Optional<Reloc::Model>()));
SmallString<0> ASMString;
raw_svector_ostream ASMStream(ASMString);
@ -1805,10 +1739,6 @@ class PPCGCodeGeneration : public ScopPass {
public:
static char ID;
GPURuntime Runtime = GPURuntime::CUDA;
GPUArch Architecture = GPUArch::NVPTX64;
/// The scop that is currently processed.
Scop *S;
@ -2592,7 +2522,7 @@ public:
executeScopConditionally(*S, Builder.getTrue(), *DT, *RI, *LI);
GPUNodeBuilder NodeBuilder(Builder, Annotator, *DL, *LI, *SE, *DT, *S,
StartBlock, Prog, Runtime, Architecture);
StartBlock, Prog);
// TODO: Handle LICM
auto SplitBlock = StartBlock->getSinglePredecessor();
@ -2680,12 +2610,7 @@ public:
char PPCGCodeGeneration::ID = 1;
Pass *polly::createPPCGCodeGenerationPass(GPUArch Arch, GPURuntime Runtime) {
PPCGCodeGeneration *generator = new PPCGCodeGeneration();
generator->Runtime = Runtime;
generator->Architecture = Arch;
return generator;
}
Pass *polly::createPPCGCodeGenerationPass() { return new PPCGCodeGeneration(); }
INITIALIZE_PASS_BEGIN(PPCGCodeGeneration, "polly-codegen-ppcg",
"Polly - Apply PPCG translation to SCOP", false, false)

View File

@ -23,7 +23,6 @@
#include "polly/Canonicalization.h"
#include "polly/CodeGen/CodeGeneration.h"
#include "polly/CodeGen/CodegenCleanup.h"
#include "polly/CodeGen/PPCGCodeGeneration.h"
#include "polly/DeLICM.h"
#include "polly/DependenceInfo.h"
#include "polly/FlattenSchedule.h"
@ -102,23 +101,6 @@ static cl::opt<TargetChoice>
),
cl::init(TARGET_CPU), cl::ZeroOrMore, cl::cat(PollyCategory));
#ifdef GPU_CODEGEN
static cl::opt<GPURuntime> GPURuntimeChoice(
"polly-gpu-runtime", cl::desc("The GPU Runtime API to target"),
cl::values(clEnumValN(GPURuntime::CUDA, "libcudart",
"use the CUDA Runtime API"),
clEnumValN(GPURuntime::OpenCL, "libopencl",
"use the OpenCL Runtime API")),
cl::init(GPURuntime::CUDA), cl::ZeroOrMore, cl::cat(PollyCategory));
static cl::opt<GPUArch>
GPUArchChoice("polly-gpu-arch", cl::desc("The GPU Architecture to target"),
cl::values(clEnumValN(GPUArch::NVPTX64, "nvptx64",
"target NVIDIA 64-bit architecture")),
cl::init(GPUArch::NVPTX64), cl::ZeroOrMore,
cl::cat(PollyCategory));
#endif
VectorizerChoice polly::PollyVectorizerChoice;
static cl::opt<polly::VectorizerChoice, true> Vectorizer(
"polly-vectorizer", cl::desc("Select the vectorization strategy"),
@ -327,8 +309,7 @@ void registerPollyPasses(llvm::legacy::PassManagerBase &PM) {
if (Target == TARGET_GPU) {
#ifdef GPU_CODEGEN
PM.add(
polly::createPPCGCodeGenerationPass(GPUArchChoice, GPURuntimeChoice));
PM.add(polly::createPPCGCodeGenerationPass());
#endif
} else {
switch (CodeGeneration) {

View File

@ -35,7 +35,7 @@
; CHECK-NOT: polly_freeDeviceMemory
; CHECK-NOT: polly_allocateMemoryForDevice
; CHECK: %13 = call i8* @polly_initContextCUDA()
; CHECK: %13 = call i8* @polly_initContext()
; CHECK-NEXT: %14 = bitcast i32* %A to i8*
; CHECK-NEXT: %15 = getelementptr [2 x i8*], [2 x i8*]* %polly_launch_0_params, i64 0, i64 0
; CHECK-NEXT: store i8* %14, i8** %polly_launch_0_param_0
@ -46,7 +46,7 @@
; CHECK-NEXT: store i8* %17, i8** %polly_launch_0_param_1
; CHECK-NEXT: %19 = bitcast i8** %polly_launch_0_param_1 to i8*
; CHECK-NEXT: store i8* %19, i8** %18
; CHECK-NEXT: %20 = call i8* @polly_getKernel(i8* getelementptr inbounds ([750 x i8], [750 x i8]* @kernel_0, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @kernel_0_name, i32 0, i32 0))
; CHECK-NEXT: %20 = call i8* @polly_getKernel(i8* getelementptr inbounds ([750 x i8], [750 x i8]* @kernel_0, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @kernel_0_name, i32 0, i32 0))
; CHECK-NEXT: call void @polly_launchKernel(i8* %20, i32 2, i32 1, i32 32, i32 1, i32 1, i8* %polly_launch_0_params_i8ptr)
; CHECK-NEXT: call void @polly_freeKernel(i8* %20)
; CHECK-NEXT: call void @polly_synchronizeDevice()

View File

@ -29,7 +29,7 @@
; CODE-NEXT: if (arg >= 32 * b0 + t0 + 1048576 * c0 + 1)
; CODE-NEXT: Stmt_bb6(0, 32 * b0 + t0 + 1048576 * c0);
; IR: call i8* @polly_initContextCUDA()
; IR: call i8* @polly_initContext()
; IR-NEXT: sext i32 %arg to i64
; IR-NEXT: mul i64
; IR-NEXT: @polly_allocateMemoryForDevice

View File

@ -1,5 +1,5 @@
if (CUDALIB_FOUND OR OpenCL_FOUND)
if (CUDALIB_FOUND)
add_subdirectory(GPURuntime)
endif (CUDALIB_FOUND OR OpenCL_FOUND)
endif (CUDALIB_FOUND)
set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} PARENT_SCOPE)

File diff suppressed because it is too large Load Diff

View File

@ -76,27 +76,12 @@
*
*/
typedef enum PollyGPURuntimeT {
RUNTIME_NONE,
RUNTIME_CUDA,
RUNTIME_CL
} PollyGPURuntime;
typedef struct PollyGPUContextT PollyGPUContext;
typedef struct PollyGPUFunctionT PollyGPUFunction;
typedef struct PollyGPUDevicePtrT PollyGPUDevicePtr;
typedef struct OpenCLContextT OpenCLContext;
typedef struct OpenCLKernelT OpenCLKernel;
typedef struct OpenCLDevicePtrT OpenCLDevicePtr;
typedef struct CUDAContextT CUDAContext;
typedef struct CUDAKernelT CUDAKernel;
typedef struct CUDADevicePtrT CUDADevicePtr;
PollyGPUContext *polly_initContextCUDA();
PollyGPUContext *polly_initContextCL();
PollyGPUFunction *polly_getKernel(const char *BinaryBuffer,
PollyGPUContext *polly_initContext();
PollyGPUFunction *polly_getKernel(const char *PTXBuffer,
const char *KernelName);
void polly_freeKernel(PollyGPUFunction *Kernel);
void polly_copyFromHostToDevice(void *HostData, PollyGPUDevicePtr *DevData,