diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp index cd84d3c2090c..b61fffb91395 100644 --- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp +++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp @@ -155,6 +155,38 @@ public: return getTaggedAccesses(MemoryAccess::MUST_WRITE); } + /// Collect parameter and array names as isl_ids. + /// + /// To reason about the different parameters and arrays used, ppcg requires + /// a list of all isl_ids in use. As PPCG traditionally performs + /// source-to-source compilation each of these isl_ids is mapped to the + /// expression that represents it. As we do not have a corresponding + /// expression in Polly, we just map each id to a 'zero' expression to match + /// the data format that ppcg expects. + /// + /// @returns Retun a map from collected ids to 'zero' ast expressions. + __isl_give isl_id_to_ast_expr *getNames() { + auto *Names = isl_id_to_ast_expr_alloc( + S->getIslCtx(), S->getNumParams() + std::distance(S->array_begin(), S->array_end())); + auto *Zero = isl_ast_expr_from_val(isl_val_zero(S->getIslCtx())); + auto *Space = S->getParamSpace(); + + for (int I = 0, E = S->getNumParams(); I < E; ++I) { + isl_id *Id = isl_space_get_dim_id(Space, isl_dim_param, I); + Names = isl_id_to_ast_expr_set(Names, Id, isl_ast_expr_copy(Zero)); + } + + for (auto &Array : S->arrays()) { + auto Id = Array.second->getBasePtrId(); + Names = isl_id_to_ast_expr_set(Names, Id, isl_ast_expr_copy(Zero)); + } + + isl_space_free(Space); + isl_ast_expr_free(Zero); + + return Names; + } + /// Create a new PPCG scop from the current scop. /// /// The PPCG scop is initialized with data from the current polly::Scop. From @@ -194,7 +226,7 @@ public: PPCGScop->tagged_dep_order = nullptr; PPCGScop->schedule = S->getScheduleTree(); - PPCGScop->names = nullptr; + PPCGScop->names = getNames(); PPCGScop->pet = nullptr; @@ -216,7 +248,7 @@ public: PPCGProg->ctx = S->getIslCtx(); PPCGProg->scop = PPCGScop; - PPCGProg->context = nullptr; + PPCGProg->context = isl_set_copy(PPCGScop->context); PPCGProg->read = nullptr; PPCGProg->may_write = nullptr; PPCGProg->must_write = nullptr; @@ -267,6 +299,13 @@ public: isl_schedule *Schedule = get_schedule(PPCGGen); + int has_permutable = has_any_permutable_node(Schedule); + + if (!has_permutable || has_permutable < 0) + Schedule = isl_schedule_free(Schedule); + else + Schedule = map_to_device(PPCGGen, Schedule); + if (DumpSchedule) { isl_printer *P = isl_printer_to_str(S->getIslCtx()); P = isl_printer_set_yaml_style(P, ISL_YAML_STYLE_BLOCK); diff --git a/polly/lib/External/ppcg/gpu.c b/polly/lib/External/ppcg/gpu.c index 05f277cf3c65..3eecf45656c3 100644 --- a/polly/lib/External/ppcg/gpu.c +++ b/polly/lib/External/ppcg/gpu.c @@ -2375,7 +2375,7 @@ static isl_bool set_permutable(__isl_keep isl_schedule_node *node, void *user) /* Does "schedule" contain any permutable band with at least one coincident * member? */ -static int has_any_permutable_node(__isl_keep isl_schedule *schedule) +int has_any_permutable_node(__isl_keep isl_schedule *schedule) { int any_permutable = 0; @@ -4938,7 +4938,7 @@ static __isl_give isl_schedule_node *add_to_from_device( * are separated from the other children and are not mapped to * the device. */ -static __isl_give isl_schedule *map_to_device(struct gpu_gen *gen, +__isl_give isl_schedule *map_to_device(struct gpu_gen *gen, __isl_take isl_schedule *schedule) { isl_schedule_node *node; diff --git a/polly/lib/External/ppcg/gpu.h b/polly/lib/External/ppcg/gpu.h index d06ddb28f3e7..c5009c0b2c2b 100644 --- a/polly/lib/External/ppcg/gpu.h +++ b/polly/lib/External/ppcg/gpu.h @@ -353,4 +353,7 @@ int generate_gpu(isl_ctx *ctx, const char *input, FILE *out, struct gpu_types *types, void *user), void *user); __isl_give isl_schedule *get_schedule(struct gpu_gen *gen); +int has_any_permutable_node(__isl_keep isl_schedule *schedule); +__isl_give isl_schedule *map_to_device(struct gpu_gen *gen, + __isl_take isl_schedule *schedule); #endif diff --git a/polly/test/GPGPU/double-parallel-loop.ll b/polly/test/GPGPU/double-parallel-loop.ll index 4c1bc9551b90..7ae5010abd1d 100644 --- a/polly/test/GPGPU/double-parallel-loop.ll +++ b/polly/test/GPGPU/double-parallel-loop.ll @@ -17,9 +17,33 @@ ; SCHED: domain: "{ Stmt_bb5[i0, i1] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 }" ; SCHED: child: -; SCHED: schedule: "[{ Stmt_bb5[i0, i1] -> [(i0)] }, { Stmt_bb5[i0, i1] -> [(i1)] }]" -; SCHED: permutable: 1 -; SCHED: coincident: [ 1, 1 ] +; SCHED: context: "{ [] }" +; SCHED: child: +; SCHED: extension: "{ }" +; SCHED: child: +; SCHED: sequence: +; SCHED: - filter: "{ }" +; SCHED: - filter: "{ Stmt_bb5[i0, i1] }" +; SCHED: child: +; SCHED: guard: "{ [] }" +; SCHED: child: +; SCHED: mark: "kernel" +; SCHED: child: +; SCHED: context: "[b0, b1, t0, t1] -> { [] : 0 <= b0 <= 255 and 0 <= b1 <= 255 and 0 <= t0 <= 3 and 0 <= t1 <= 3 }" +; SCHED: child: +; SCHED: filter: "[b0, b1] -> { Stmt_bb5[i0, i1] : -3 - 4b0 + i0 <= 1024*floor((i0)/1024) <= -4b0 + i0 and -3 - 4b1 + i1 <= 1024*floor((i1)/1024) <= -4b1 + i1 }" +; SCHED: child: +; SCHED: schedule: "[{ Stmt_bb5[i0, i1] -> [(floor((i0)/1024))] }, { Stmt_bb5[i0, i1] -> [(floor((i1)/1024))] }]" +; SCHED: permutable: 1 +; SCHED: coincident: [ 1, 1 ] +; SCHED: child: +; SCHED: filter: "[t0, t1] -> { Stmt_bb5[i0, i1] : 4*floor((-t0 + i0)/4) = -t0 + i0 and 4*floor((-t1 + i1)/4) = -t1 + i1 and 0 <= t0 <= 3 and 0 <= t1 <= 3 }" +; SCHED: child: +; SCHED: schedule: "[{ Stmt_bb5[i0, i1] -> [(0)] }, { Stmt_bb5[i0, i1] -> [(0)] }]" +; SCHED: permutable: 1 +; SCHED: coincident: [ 1, 1 ] +; SCHED: - filter: "{ }" + ; void double_parallel_loop(float A[][1024]) { ; for (long i = 0; i < 1024; i++)