forked from OSchip/llvm-project
GPGPU: Generate an AST for the GPU-mapped schedule
For this we need to provide an explicit list of statements as they occur in the polly::Scop to ppcg. We also setup basic AST printing facilities to facilitate debugging. To allow code reuse some (minor) changes in ppcg are have been necessary. llvm-svn: 275436
This commit is contained in:
parent
60c6002570
commit
69b4675180
|
@ -26,8 +26,11 @@
|
|||
#include "isl/union_map.h"
|
||||
|
||||
extern "C" {
|
||||
#include "cuda.h"
|
||||
#include "gpu.h"
|
||||
#include "gpu_print.h"
|
||||
#include "ppcg.h"
|
||||
#include "schedule.h"
|
||||
}
|
||||
|
||||
#include "llvm/Support/Debug.h"
|
||||
|
@ -41,6 +44,12 @@ static cl::opt<bool> DumpSchedule("polly-acc-dump-schedule",
|
|||
cl::desc("Dump the computed GPU Schedule"),
|
||||
cl::Hidden, cl::init(false), cl::ZeroOrMore,
|
||||
cl::cat(PollyCategory));
|
||||
|
||||
static cl::opt<bool>
|
||||
DumpCode("polly-acc-dump-code",
|
||||
cl::desc("Dump C code describing the GPU mapping"), cl::Hidden,
|
||||
cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
|
||||
|
||||
/// Create the ast expressions for a ScopStmt.
|
||||
///
|
||||
/// This function is a callback for to generate the ast expressions for each
|
||||
|
@ -256,6 +265,33 @@ public:
|
|||
return PPCGScop;
|
||||
}
|
||||
|
||||
/// Collect the list of GPU statements.
|
||||
///
|
||||
/// Each statement has an id, a pointer to the underlying data structure,
|
||||
/// as well as a list with all memory accesses.
|
||||
///
|
||||
/// TODO: Initialize the list of memory accesses.
|
||||
///
|
||||
/// @returns A linked-list of statements.
|
||||
gpu_stmt *getStatements() {
|
||||
gpu_stmt *Stmts = isl_calloc_array(S->getIslCtx(), struct gpu_stmt,
|
||||
std::distance(S->begin(), S->end()));
|
||||
|
||||
int i = 0;
|
||||
for (auto &Stmt : *S) {
|
||||
gpu_stmt *GPUStmt = &Stmts[i];
|
||||
|
||||
GPUStmt->id = Stmt.getDomainId();
|
||||
|
||||
// We use the pet stmt pointer to keep track of the Polly statements.
|
||||
GPUStmt->stmt = (pet_stmt *)&Stmt;
|
||||
GPUStmt->accesses = nullptr;
|
||||
i++;
|
||||
}
|
||||
|
||||
return Stmts;
|
||||
}
|
||||
|
||||
/// Create a default-initialized PPCG GPU program.
|
||||
///
|
||||
/// @returns A new gpu grogram description.
|
||||
|
@ -278,14 +314,90 @@ public:
|
|||
PPCGProg->to_inner = nullptr;
|
||||
PPCGProg->any_to_outer = nullptr;
|
||||
PPCGProg->array_order = nullptr;
|
||||
PPCGProg->n_stmts = 0;
|
||||
PPCGProg->stmts = nullptr;
|
||||
PPCGProg->n_stmts = std::distance(S->begin(), S->end());
|
||||
PPCGProg->stmts = getStatements();
|
||||
PPCGProg->n_array = 0;
|
||||
PPCGProg->array = nullptr;
|
||||
|
||||
return PPCGProg;
|
||||
}
|
||||
|
||||
struct PrintGPUUserData {
|
||||
struct cuda_info *CudaInfo;
|
||||
struct gpu_prog *PPCGProg;
|
||||
std::vector<ppcg_kernel *> Kernels;
|
||||
};
|
||||
|
||||
/// Print a user statement node in the host code.
|
||||
///
|
||||
/// We use ppcg's printing facilities to print the actual statement and
|
||||
/// additionally build up a list of all kernels that are encountered in the
|
||||
/// host ast.
|
||||
///
|
||||
/// @param P The printer to print to
|
||||
/// @param Options The printing options to use
|
||||
/// @param Node The node to print
|
||||
/// @param User A user pointer to carry additional data. This pointer is
|
||||
/// expected to be of type PrintGPUUserData.
|
||||
///
|
||||
/// @returns A printer to which the output has been printed.
|
||||
static __isl_give isl_printer *
|
||||
printHostUser(__isl_take isl_printer *P,
|
||||
__isl_take isl_ast_print_options *Options,
|
||||
__isl_take isl_ast_node *Node, void *User) {
|
||||
auto Data = (struct PrintGPUUserData *)User;
|
||||
auto Id = isl_ast_node_get_annotation(Node);
|
||||
|
||||
if (Id) {
|
||||
auto Kernel = (struct ppcg_kernel *)isl_id_get_user(Id);
|
||||
isl_id_free(Id);
|
||||
Data->Kernels.push_back(Kernel);
|
||||
}
|
||||
|
||||
return print_host_user(P, Options, Node, User);
|
||||
}
|
||||
|
||||
/// Print C code corresponding to the control flow in @p Kernel.
|
||||
///
|
||||
/// @param Kernel The kernel to print
|
||||
void printKernel(ppcg_kernel *Kernel) {
|
||||
auto *P = isl_printer_to_str(S->getIslCtx());
|
||||
P = isl_printer_set_output_format(P, ISL_FORMAT_C);
|
||||
auto *Options = isl_ast_print_options_alloc(S->getIslCtx());
|
||||
P = isl_ast_node_print(Kernel->tree, P, Options);
|
||||
char *String = isl_printer_get_str(P);
|
||||
printf("%s\n", String);
|
||||
free(String);
|
||||
isl_printer_free(P);
|
||||
}
|
||||
|
||||
/// Print C code corresponding to the GPU code described by @p Tree.
|
||||
///
|
||||
/// @param Tree An AST describing GPU code
|
||||
/// @param PPCGProg The PPCG program from which @Tree has been constructed.
|
||||
void printGPUTree(isl_ast_node *Tree, gpu_prog *PPCGProg) {
|
||||
auto *P = isl_printer_to_str(S->getIslCtx());
|
||||
P = isl_printer_set_output_format(P, ISL_FORMAT_C);
|
||||
|
||||
PrintGPUUserData Data;
|
||||
Data.PPCGProg = PPCGProg;
|
||||
|
||||
auto *Options = isl_ast_print_options_alloc(S->getIslCtx());
|
||||
Options =
|
||||
isl_ast_print_options_set_print_user(Options, printHostUser, &Data);
|
||||
P = isl_ast_node_print(Tree, P, Options);
|
||||
char *String = isl_printer_get_str(P);
|
||||
printf("# host\n");
|
||||
printf("%s\n", String);
|
||||
free(String);
|
||||
isl_printer_free(P);
|
||||
|
||||
for (auto Kernel : Data.Kernels) {
|
||||
printf("# kernel%d\n", Kernel->id);
|
||||
printKernel(Kernel);
|
||||
}
|
||||
}
|
||||
|
||||
// Generate a GPU program using PPCG.
|
||||
//
|
||||
// GPU mapping consists of multiple steps:
|
||||
|
@ -322,10 +434,12 @@ public:
|
|||
|
||||
int has_permutable = has_any_permutable_node(Schedule);
|
||||
|
||||
if (!has_permutable || has_permutable < 0)
|
||||
if (!has_permutable || has_permutable < 0) {
|
||||
Schedule = isl_schedule_free(Schedule);
|
||||
else
|
||||
} else {
|
||||
Schedule = map_to_device(PPCGGen, Schedule);
|
||||
PPCGGen->tree = generate_code(PPCGGen, isl_schedule_copy(Schedule));
|
||||
}
|
||||
|
||||
if (DumpSchedule) {
|
||||
isl_printer *P = isl_printer_to_str(S->getIslCtx());
|
||||
|
@ -341,6 +455,15 @@ public:
|
|||
isl_printer_free(P);
|
||||
}
|
||||
|
||||
if (DumpCode) {
|
||||
printf("Code\n");
|
||||
printf("====\n");
|
||||
if (PPCGGen->tree)
|
||||
printGPUTree(PPCGGen->tree, PPCGProg);
|
||||
else
|
||||
printf("No code generated\n");
|
||||
}
|
||||
|
||||
isl_schedule_free(Schedule);
|
||||
|
||||
return PPCGGen;
|
||||
|
|
|
@ -153,20 +153,20 @@ static __isl_give isl_printer *copy_array_from_device(
|
|||
return p;
|
||||
}
|
||||
|
||||
static void print_reverse_list(FILE *out, int len, int *list)
|
||||
static isl_printer *print_reverse_list(isl_printer *p, int len, int *list)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (len == 0)
|
||||
return;
|
||||
return p;
|
||||
|
||||
fprintf(out, "(");
|
||||
p = isl_printer_print_str(p, "(");
|
||||
for (i = 0; i < len; ++i) {
|
||||
if (i)
|
||||
fprintf(out, ", ");
|
||||
fprintf(out, "%d", list[len - 1 - i]);
|
||||
p = isl_printer_print_str(p, ", ");
|
||||
p = isl_printer_print_int(p, list[len - 1 - i]);
|
||||
}
|
||||
fprintf(out, ")");
|
||||
return isl_printer_print_str(p, ")");
|
||||
}
|
||||
|
||||
/* Print the effective grid size as a list of the sizes in each
|
||||
|
@ -534,7 +534,7 @@ struct print_host_user_data {
|
|||
* In case of a kernel launch, print a block of statements that
|
||||
* defines the grid and the block and then launches the kernel.
|
||||
*/
|
||||
static __isl_give isl_printer *print_host_user(__isl_take isl_printer *p,
|
||||
__isl_give isl_printer *print_host_user(__isl_take isl_printer *p,
|
||||
__isl_take isl_ast_print_options *print_options,
|
||||
__isl_keep isl_ast_node *node, void *user)
|
||||
{
|
||||
|
@ -569,8 +569,7 @@ static __isl_give isl_printer *print_host_user(__isl_take isl_printer *p,
|
|||
p = isl_printer_print_str(p, "dim3 k");
|
||||
p = isl_printer_print_int(p, kernel->id);
|
||||
p = isl_printer_print_str(p, "_dimBlock");
|
||||
print_reverse_list(isl_printer_get_file(p),
|
||||
kernel->n_block, kernel->block_dim);
|
||||
p = print_reverse_list(p, kernel->n_block, kernel->block_dim);
|
||||
p = isl_printer_print_str(p, ";");
|
||||
p = isl_printer_end_line(p);
|
||||
|
||||
|
@ -600,7 +599,9 @@ static __isl_give isl_printer *print_host_user(__isl_take isl_printer *p,
|
|||
p = isl_printer_start_line(p);
|
||||
p = isl_printer_end_line(p);
|
||||
|
||||
#if 0
|
||||
print_kernel(data->prog, kernel, data->cuda);
|
||||
#endif
|
||||
|
||||
return p;
|
||||
}
|
||||
|
|
|
@ -6,5 +6,8 @@
|
|||
|
||||
int generate_cuda(isl_ctx *ctx, struct ppcg_options *options,
|
||||
const char *input);
|
||||
__isl_give isl_printer *print_host_user(__isl_take isl_printer *p,
|
||||
__isl_take isl_ast_print_options *print_options,
|
||||
__isl_keep isl_ast_node *node, void *user);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -2297,7 +2297,7 @@ static isl_bool update_depth(__isl_keep isl_schedule_node *node, void *user)
|
|||
* The ASTs for the device code are embedded in ppcg_kernel objects
|
||||
* attached to the leaf nodes that call "kernel".
|
||||
*/
|
||||
static __isl_give isl_ast_node *generate_code(struct gpu_gen *gen,
|
||||
__isl_give isl_ast_node *generate_code(struct gpu_gen *gen,
|
||||
__isl_take isl_schedule *schedule)
|
||||
{
|
||||
struct ppcg_at_domain_data data;
|
||||
|
|
|
@ -367,4 +367,6 @@ __isl_give isl_schedule *get_schedule(struct gpu_gen *gen);
|
|||
int has_any_permutable_node(__isl_keep isl_schedule *schedule);
|
||||
__isl_give isl_schedule *map_to_device(struct gpu_gen *gen,
|
||||
__isl_take isl_schedule *schedule);
|
||||
__isl_give isl_ast_node *generate_code(struct gpu_gen *gen,
|
||||
__isl_take isl_schedule *schedule);
|
||||
#endif
|
||||
|
|
|
@ -3,6 +3,10 @@
|
|||
; RUN: -disable-output < %s | \
|
||||
; RUN: FileCheck -check-prefix=SCHED %s
|
||||
|
||||
; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
|
||||
; RUN: -disable-output < %s | \
|
||||
; RUN: FileCheck -check-prefix=CODE %s
|
||||
|
||||
; REQUIRES: pollyacc
|
||||
|
||||
; CHECK: Stmt_bb5
|
||||
|
@ -44,6 +48,20 @@
|
|||
; SCHED: coincident: [ 1, 1 ]
|
||||
; SCHED: - filter: "{ }"
|
||||
|
||||
; CODE: Code
|
||||
; CODE: ====
|
||||
; CODE: # host
|
||||
; CODE: {
|
||||
; CODE: dim3 k0_dimBlock(16, 32);
|
||||
; CODE: dim3 k0_dimGrid(32, 32);
|
||||
; CODE: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
|
||||
; CODE: }
|
||||
|
||||
; CODE: # kernel0
|
||||
; CODE: for (int c3 = 0; c3 <= 1; c3 += 1)
|
||||
; CODE: Stmt_bb5(32 * b0 + t0, 32 * b1 + t1 + 16 * c3);
|
||||
|
||||
|
||||
; void double_parallel_loop(float A[][1024]) {
|
||||
; for (long i = 0; i < 1024; i++)
|
||||
; for (long j = 0; j < 1024; j++)
|
||||
|
|
Loading…
Reference in New Issue