GPGPU: Map initial schedule to GPU schedule

This change now applies ppcg's GPU mapping on our initial schedule. For this to work, we need to also initialize the set of all names (isl_ids) used in the scop as well as the program context. llvm-svn: 275396
2016-07-14 10:51:52 +00:00 · 2016-07-14 10:51:52 +00:00 · aef5196f75
parent 681bd5688f
commit aef5196f75
4 changed files with 73 additions and 7 deletions
--- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp
+++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
@ -155,6 +155,38 @@ public:
    return getTaggedAccesses(MemoryAccess::MUST_WRITE);
  }

+  /// Collect parameter and array names as isl_ids.
+  ///
+  /// To reason about the different parameters and arrays used, ppcg requires
+  /// a list of all isl_ids in use. As PPCG traditionally performs
+  /// source-to-source compilation each of these isl_ids is mapped to the
+  /// expression that represents it. As we do not have a corresponding
+  /// expression in Polly, we just map each id to a 'zero' expression to match
+  /// the data format that ppcg expects.
+  ///
+  /// @returns Retun a map from collected ids to 'zero' ast expressions.
+  __isl_give isl_id_to_ast_expr *getNames() {
+    auto *Names = isl_id_to_ast_expr_alloc(
+        S->getIslCtx(), S->getNumParams() + std::distance(S->array_begin(), S->array_end()));
+    auto *Zero = isl_ast_expr_from_val(isl_val_zero(S->getIslCtx()));
+    auto *Space = S->getParamSpace();
+
+    for (int I = 0, E = S->getNumParams(); I < E; ++I) {
+      isl_id *Id = isl_space_get_dim_id(Space, isl_dim_param, I);
+      Names = isl_id_to_ast_expr_set(Names, Id, isl_ast_expr_copy(Zero));
+    }
+
+    for (auto &Array : S->arrays()) {
+      auto Id = Array.second->getBasePtrId();
+      Names = isl_id_to_ast_expr_set(Names, Id, isl_ast_expr_copy(Zero));
+    }
+
+    isl_space_free(Space);
+    isl_ast_expr_free(Zero);
+
+    return Names;
+  }
+
  /// Create a new PPCG scop from the current scop.
  ///
  /// The PPCG scop is initialized with data from the current polly::Scop. From
@ -194,7 +226,7 @@ public:
    PPCGScop->tagged_dep_order = nullptr;

    PPCGScop->schedule = S->getScheduleTree();
-    PPCGScop->names = nullptr;
+    PPCGScop->names = getNames();

    PPCGScop->pet = nullptr;

@ -216,7 +248,7 @@ public:

    PPCGProg->ctx = S->getIslCtx();
    PPCGProg->scop = PPCGScop;
-    PPCGProg->context = nullptr;
+    PPCGProg->context = isl_set_copy(PPCGScop->context);
    PPCGProg->read = nullptr;
    PPCGProg->may_write = nullptr;
    PPCGProg->must_write = nullptr;
@ -267,6 +299,13 @@ public:

    isl_schedule *Schedule = get_schedule(PPCGGen);

+    int has_permutable = has_any_permutable_node(Schedule);
+
+    if (!has_permutable || has_permutable < 0)
+      Schedule = isl_schedule_free(Schedule);
+    else
+      Schedule = map_to_device(PPCGGen, Schedule);
+
    if (DumpSchedule) {
      isl_printer *P = isl_printer_to_str(S->getIslCtx());
      P = isl_printer_set_yaml_style(P, ISL_YAML_STYLE_BLOCK);
--- a/polly/lib/External/ppcg/gpu.c
+++ b/polly/lib/External/ppcg/gpu.c
@ -2375,7 +2375,7 @@ static isl_bool set_permutable(__isl_keep isl_schedule_node *node, void *user)
 /* Does "schedule" contain any permutable band with at least one coincident
 * member?
 */
-static int has_any_permutable_node(__isl_keep isl_schedule *schedule)
+int has_any_permutable_node(__isl_keep isl_schedule *schedule)
 {
 	int any_permutable = 0;

@ -4938,7 +4938,7 @@ static __isl_give isl_schedule_node *add_to_from_device(
 * are separated from the other children and are not mapped to
 * the device.
 */
-static __isl_give isl_schedule *map_to_device(struct gpu_gen *gen,
+__isl_give isl_schedule *map_to_device(struct gpu_gen *gen,
 	__isl_take isl_schedule *schedule)
 {
 	isl_schedule_node *node;
--- a/polly/lib/External/ppcg/gpu.h
+++ b/polly/lib/External/ppcg/gpu.h
@ -353,4 +353,7 @@ int generate_gpu(isl_ctx *ctx, const char *input, FILE *out,
 		struct gpu_types *types, void *user), void *user);

 __isl_give isl_schedule *get_schedule(struct gpu_gen *gen);
+int has_any_permutable_node(__isl_keep isl_schedule *schedule);
+__isl_give isl_schedule *map_to_device(struct gpu_gen *gen,
+                                       __isl_take isl_schedule *schedule);
 #endif
--- a/polly/test/GPGPU/double-parallel-loop.ll
+++ b/polly/test/GPGPU/double-parallel-loop.ll
@ -17,9 +17,33 @@

 ; SCHED: domain: "{ Stmt_bb5[i0, i1] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 }"
 ; SCHED: child:
-; SCHED:   schedule: "[{ Stmt_bb5[i0, i1] -> [(i0)] }, { Stmt_bb5[i0, i1] -> [(i1)] }]"
-; SCHED:   permutable: 1
-; SCHED:   coincident: [ 1, 1 ]
+; SCHED:   context: "{ [] }"
+; SCHED:   child:
+; SCHED:     extension: "{  }"
+; SCHED:     child:
+; SCHED:       sequence:
+; SCHED:       - filter: "{  }"
+; SCHED:       - filter: "{ Stmt_bb5[i0, i1] }"
+; SCHED:         child:
+; SCHED:           guard: "{ [] }"
+; SCHED:           child:
+; SCHED:             mark: "kernel"
+; SCHED:             child:
+; SCHED:               context: "[b0, b1, t0, t1] -> { [] : 0 <= b0 <= 255 and 0 <= b1 <= 255 and 0 <= t0 <= 3 and 0 <= t1 <= 3 }"
+; SCHED:               child:
+; SCHED:                 filter: "[b0, b1] -> { Stmt_bb5[i0, i1] : -3 - 4b0 + i0 <= 1024*floor((i0)/1024) <= -4b0 + i0 and -3 - 4b1 + i1 <= 1024*floor((i1)/1024) <= -4b1 + i1 }"
+; SCHED:                 child:
+; SCHED:                   schedule: "[{ Stmt_bb5[i0, i1] -> [(floor((i0)/1024))] }, { Stmt_bb5[i0, i1] -> [(floor((i1)/1024))] }]"
+; SCHED:                   permutable: 1
+; SCHED:                   coincident: [ 1, 1 ]
+; SCHED:                   child:
+; SCHED:                     filter: "[t0, t1] -> { Stmt_bb5[i0, i1] : 4*floor((-t0 + i0)/4) = -t0 + i0 and 4*floor((-t1 + i1)/4) = -t1 + i1 and 0 <= t0 <= 3 and 0 <= t1 <= 3 }"
+; SCHED:                     child:
+; SCHED:                       schedule: "[{ Stmt_bb5[i0, i1] -> [(0)] }, { Stmt_bb5[i0, i1] -> [(0)] }]"
+; SCHED:                       permutable: 1
+; SCHED:                       coincident: [ 1, 1 ]
+; SCHED:       - filter: "{  }"
+

 ;    void double_parallel_loop(float A[][1024]) {
 ;      for (long i = 0; i < 1024; i++)