GPGPU: initialize GPU context and simplify the corresponding GPURuntime interface.

There is no need to expose the selected device at the moment. We also pass back
pointers as return values, as this simplifies the interface.

llvm-svn: 276623
This commit is contained in:
Tobias Grosser 2016-07-25 09:16:01 +00:00
parent 8ed5e5999f
commit fa7b080218
4 changed files with 142 additions and 29 deletions

View File

@ -135,6 +135,12 @@ public:
getExprBuilder().setIDToSAI(&IDToSAI);
}
/// Create after-run-time-check initialization code.
void initializeAfterRTH();
/// Finalize the generated scop.
virtual void finalize();
private:
/// A vector of array base pointers for which a new ScopArrayInfo was created.
///
@ -142,6 +148,9 @@ private:
/// more.
std::vector<Value *> LocalArrays;
/// The current GPU context.
Value *GPUContext;
/// A module containing GPU code.
///
/// This pointer is only set in case we are currently generating GPU code.
@ -256,8 +265,113 @@ private:
/// Free the LLVM-IR module corresponding to the kernel and -- if requested --
/// dump its IR to stderr.
void finalizeKernelFunction();
void allocateDeviceArrays();
/// Create a call to initialize the GPU context.
///
/// @returns A pointer to the newly initialized context.
Value *createCallInitContext();
/// Create a call to free the GPU context.
///
/// @param Context A pointer to an initialized GPU context.
void createCallFreeContext(Value *Context);
Value *createCallAllocateMemoryForDevice(Value *Size);
};
void GPUNodeBuilder::initializeAfterRTH() {
GPUContext = createCallInitContext();
allocateDeviceArrays();
}
void GPUNodeBuilder::finalize() {
createCallFreeContext(GPUContext);
IslNodeBuilder::finalize();
}
void GPUNodeBuilder::allocateDeviceArrays() {
isl_ast_build *Build = isl_ast_build_from_context(S.getContext());
for (int i = 0; i < Prog->n_array; ++i) {
gpu_array_info *Array = &Prog->array[i];
std::string DevPtrName("p_devptr_");
DevPtrName.append(Array->name);
Value *ArraySize = ConstantInt::get(Builder.getInt64Ty(), Array->size);
if (!gpu_array_is_scalar(Array)) {
auto OffsetDimZero = isl_pw_aff_copy(Array->bound[0]);
isl_ast_expr *Res = isl_ast_build_expr_from_pw_aff(Build, OffsetDimZero);
for (unsigned int i = 1; i < Array->n_index; i++) {
isl_pw_aff *Bound_I = isl_pw_aff_copy(Array->bound[i]);
isl_ast_expr *Expr = isl_ast_build_expr_from_pw_aff(Build, Bound_I);
Res = isl_ast_expr_mul(Res, Expr);
}
Value *NumElements = ExprBuilder.create(Res);
ArraySize = Builder.CreateMul(ArraySize, NumElements);
}
Value *DevPtr = createCallAllocateMemoryForDevice(ArraySize);
DevPtr->setName(DevPtrName);
}
isl_ast_build_free(Build);
}
Value *GPUNodeBuilder::createCallAllocateMemoryForDevice(Value *Size) {
const char *Name = "polly_allocateMemoryForDevice";
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
Function *F = M->getFunction(Name);
// If F is not available, declare it.
if (!F) {
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
std::vector<Type *> Args;
Args.push_back(Builder.getInt64Ty());
FunctionType *Ty = FunctionType::get(Builder.getInt8PtrTy(), Args, false);
F = Function::Create(Ty, Linkage, Name, M);
}
return Builder.CreateCall(F, {Size});
}
Value *GPUNodeBuilder::createCallInitContext() {
const char *Name = "polly_initContext";
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
Function *F = M->getFunction(Name);
// If F is not available, declare it.
if (!F) {
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
std::vector<Type *> Args;
FunctionType *Ty = FunctionType::get(Builder.getInt8PtrTy(), Args, false);
F = Function::Create(Ty, Linkage, Name, M);
}
return Builder.CreateCall(F, {});
}
void GPUNodeBuilder::createCallFreeContext(Value *Context) {
const char *Name = "polly_freeContext";
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
Function *F = M->getFunction(Name);
// If F is not available, declare it.
if (!F) {
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
std::vector<Type *> Args;
Args.push_back(Builder.getInt8PtrTy());
FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false);
F = Function::Create(Ty, Linkage, Name, M);
}
Builder.CreateCall(F, {Context});
}
/// Check if one string is a prefix of another.
///
/// @param String The string in which to look for the prefix.
@ -1325,6 +1439,8 @@ public:
Builder.SetInsertPoint(SplitBlock->getTerminator());
NodeBuilder.addParameters(S->getContext());
Builder.SetInsertPoint(&*StartBlock->begin());
NodeBuilder.initializeAfterRTH();
NodeBuilder.create(Root);
NodeBuilder.finalize();
}

View File

@ -92,6 +92,9 @@
; IR-NEXT: br i1 true, label %polly.start, label %bb2
; IR: polly.start:
; IR-NEXT: [[GPUContext:%.*]] = call i8* @polly_initContext()
; IR-NEXT: %p_devptr_MemRef_A = call i8* @polly_allocateMemoryForDevice(i64 4194304)
; IR-NEXT: call void @polly_freeContext(i8* [[GPUContext]])
; IR-NEXT: br label %polly.exiting
; IR: polly.exiting:

View File

@ -44,10 +44,6 @@ struct PollyGPUFunctionT {
CUfunction Cuda;
};
struct PollyGPUDeviceT {
CUdevice Cuda;
};
struct PollyGPUDevicePtrT {
CUdeviceptr Cuda;
};
@ -219,10 +215,12 @@ static int initialDeviceAPIs() {
return 1;
}
void polly_initDevice(PollyGPUContext **Context, PollyGPUDevice **Device) {
PollyGPUContext *polly_initContext() {
DebugMode = getenv("POLLY_DEBUG") != 0;
dump_function();
PollyGPUContext *Context;
CUdevice Device;
int Major = 0, Minor = 0, DeviceID = 0;
char DeviceName[256];
@ -246,26 +244,22 @@ void polly_initDevice(PollyGPUContext **Context, PollyGPUDevice **Device) {
exit(-1);
}
/* We select the 1st device as default. */
*Device = malloc(sizeof(PollyGPUDevice));
if (*Device == 0) {
fprintf(stdout, "Allocate memory for Polly GPU device failed.\n");
exit(-1);
}
CuDeviceGetFcnPtr(&((*Device)->Cuda), 0);
CuDeviceGetFcnPtr(&Device, 0);
/* Get compute capabilities and the device name. */
CuDeviceComputeCapabilityFcnPtr(&Major, &Minor, (*Device)->Cuda);
CuDeviceGetNameFcnPtr(DeviceName, 256, (*Device)->Cuda);
CuDeviceComputeCapabilityFcnPtr(&Major, &Minor, Device);
CuDeviceGetNameFcnPtr(DeviceName, 256, Device);
debug_print("> Running on GPU device %d : %s.\n", DeviceID, DeviceName);
/* Create context on the device. */
*Context = malloc(sizeof(PollyGPUContext));
if (*Context == 0) {
Context = (PollyGPUContext *)malloc(sizeof(PollyGPUContext));
if (Context == 0) {
fprintf(stdout, "Allocate memory for Polly GPU context failed.\n");
exit(-1);
}
CuCtxCreateFcnPtr(&((*Context)->Cuda), 0, (*Device)->Cuda);
CuCtxCreateFcnPtr(&(Context->Cuda), 0, Device);
return Context;
}
void polly_getPTXModule(void *PTXBuffer, PollyGPUModule **Module) {
@ -347,7 +341,6 @@ void polly_launchKernel(PollyGPUFunction *Kernel, int GridWidth,
void polly_cleanupGPGPUResources(void *HostData, PollyGPUDevicePtr *DevData,
PollyGPUModule *Module,
PollyGPUContext *Context,
PollyGPUFunction *Kernel) {
dump_function();
@ -365,16 +358,18 @@ void polly_cleanupGPGPUResources(void *HostData, PollyGPUDevicePtr *DevData,
CuModuleUnloadFcnPtr(Module->Cuda);
free(Module);
}
if (Kernel) {
free(Kernel);
}
}
void polly_freeContext(PollyGPUContext *Context) {
if (Context->Cuda) {
CuCtxDestroyFcnPtr(Context->Cuda);
free(Context);
}
if (Kernel) {
free(Kernel);
}
dlclose(HandleCuda);
dlclose(HandleCudaRT);
}

View File

@ -44,10 +44,9 @@
* const char *Entry = "_Z8myKernelPi";
*
* int main() {
* PollyGPUContext *Context;
* PollyGPUModule *Module;
* PollyGPUFunction *Kernel;
* PollyGPUDevice *Device;
* PollyGPUContext *Context;
* PollyGPUDevicePtr *PtrDevData;
* int *HostData;
* int MemSize;
@ -57,13 +56,14 @@
* int GridHeight = 8;
*
* MemSize = 256*64*sizeof(int);
* polly_initDevice(&Context, &Device);
* Context = polly_initContext();
* polly_getPTXModule(KernelString, &Module);
* polly_getPTXKernelEntry(Entry, Module, &Kernel);
* polly_setKernelParameters(Kernel, BlockWidth, BlockHeight, DevData);
* polly_launchKernel(Kernel, GridWidth, GridHeight);
* polly_copyFromDeviceToHost(HostData, DevData, MemSize);
* polly_cleanupGPGPUResources(HostData, DevData, Module, Context, Kernel);
* polly_cleanupGPGPUResources(HostData, DevData, Module, Kernel);
* polly_freeContext(Context);
* }
*
*/
@ -71,10 +71,9 @@
typedef struct PollyGPUContextT PollyGPUContext;
typedef struct PollyGPUModuleT PollyGPUModule;
typedef struct PollyGPUFunctionT PollyGPUFunction;
typedef struct PollyGPUDeviceT PollyGPUDevice;
typedef struct PollyGPUDevicePtrT PollyGPUDevicePtr;
void polly_initDevice(PollyGPUContext **Context, PollyGPUDevice **Device);
PollyGPUContext *polly_initContext();
void polly_getPTXModule(void *PTXBuffer, PollyGPUModule **Module);
void polly_getPTXKernelEntry(const char *KernelName, PollyGPUModule *Module,
PollyGPUFunction **Kernel);
@ -88,6 +87,6 @@ void polly_launchKernel(PollyGPUFunction *Kernel, int GridWidth,
int GridHeight);
void polly_cleanupGPGPUResources(void *HostData, PollyGPUDevicePtr *DevData,
PollyGPUModule *Module,
PollyGPUContext *Context,
PollyGPUFunction *Kernel);
void free_Context(PollyGPUContext *Context);
#endif /* GPUJIT_H_ */