GPGPU: collect array references

Initialize the list of references to a GPU array to ensure that the arrays that
need to be passed to kernel calls are computed correctly.  Furthermore, the very
same information is also necessary to compute synchronization correctly. As the
functionality to compute these references is already available, what is left for
us to do is only to connect the necessary functionality to compute array
reference information.

llvm-svn: 275798
This commit is contained in:
Tobias Grosser 2016-07-18 15:44:32 +00:00
parent 1fb9b64dc0
commit b9fc860a57
8 changed files with 32 additions and 27 deletions

View File

@ -450,6 +450,8 @@ public:
setArrayBounds(PPCGArray, Array);
i++;
collect_references(PPCGProg, &PPCGArray);
}
}

View File

@ -55,7 +55,7 @@ static const char *get_outer_array_name(__isl_keep isl_map *access)
/* Collect all references to the given array and store pointers to them
* in array->refs.
*/
static void collect_references(struct gpu_prog *prog,
void collect_references(struct gpu_prog *prog,
struct gpu_array_info *array)
{
int i;

View File

@ -371,4 +371,5 @@ __isl_give isl_ast_node *generate_code(struct gpu_gen *gen,
__isl_take isl_schedule *schedule);
__isl_give isl_union_set *compute_may_persist(struct gpu_prog *prog);
void collect_references(struct gpu_prog *prog, struct gpu_array_info *array);
#endif

View File

@ -69,7 +69,7 @@
; CODE-NEXT: {
; CODE-NEXT: dim3 k0_dimBlock(16, 32);
; CODE-NEXT: dim3 k0_dimGrid(32, 32);
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }

View File

@ -18,7 +18,7 @@
; CODE-NEXT: {
; CODE-NEXT: dim3 k0_dimBlock(32);
; CODE-NEXT: dim3 k0_dimGrid(4);
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (c0);
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, c0);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }

View File

@ -20,7 +20,7 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) #0
; CODE-NEXT: {
; CODE-NEXT: dim3 k0_dimBlock(32);
; CODE-NEXT: dim3 k0_dimGrid(16);
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (p_0, p_1);
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, dev_MemRef_Q, p_0, p_1);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
@ -28,14 +28,14 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) #0
; CODE-NEXT: {
; CODE-NEXT: dim3 k1_dimBlock(32);
; CODE-NEXT: dim3 k1_dimGrid(p_1 <= -1048034 ? 32768 : -p_1 + floord(31 * p_1 + 30, 32) + 16);
; CODE-NEXT: kernel1 <<<k1_dimGrid, k1_dimBlock>>> (p_0, p_1);
; CODE-NEXT: kernel1 <<<k1_dimGrid, k1_dimBlock>>> (dev_MemRef_A, dev_MemRef_R, dev_MemRef_Q, p_0, p_1);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
; CODE: {
; CODE-NEXT: dim3 k2_dimBlock(16, 32);
; CODE-NEXT: dim3 k2_dimGrid(16, p_1 <= -7650 ? 256 : -p_1 + floord(31 * p_1 + 30, 32) + 16);
; CODE-NEXT: kernel2 <<<k2_dimGrid, k2_dimBlock>>> (p_0, p_1);
; CODE-NEXT: kernel2 <<<k2_dimGrid, k2_dimBlock>>> (dev_MemRef_A, dev_MemRef_R, dev_MemRef_Q, p_0, p_1);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
@ -53,11 +53,13 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) #0
; CODE: # kernel1
; CODE-NEXT: for (int c0 = 0; c0 <= (-p_1 - 32 * b0 + 510) / 1048576; c0 += 1)
; CODE-NEXT: if (p_1 + 32 * b0 + t0 + 1048576 * c0 <= 510) {
; CODE-NEXT: Stmt_for_body35(32 * b0 + t0 + 1048576 * c0);
; CODE-NEXT: for (int c1 = 0; c1 <= 15; c1 += 1)
; CODE-NEXT: for (int c1 = 0; c1 <= 15; c1 += 1) {
; CODE-NEXT: if (p_1 + 32 * b0 + t0 + 1048576 * c0 <= 510 && c1 == 0)
; CODE-NEXT: Stmt_for_body35(32 * b0 + t0 + 1048576 * c0);
; CODE-NEXT: if (p_1 + 32 * b0 + t0 + 1048576 * c0 <= 510)
; CODE-NEXT: for (int c3 = 0; c3 <= 31; c3 += 1)
; CODE-NEXT: Stmt_for_body42(32 * b0 + t0 + 1048576 * c0, 32 * c1 + c3);
; CODE-NEXT: sync0();
; CODE-NEXT: }
; CODE: # kernel2

View File

@ -13,7 +13,7 @@
; CODE-NEXT: {
; CODE-NEXT: dim3 k0_dimBlock(32);
; CODE-NEXT: dim3 k0_dimGrid(32);
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, dev_MemRef_b);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
@ -63,7 +63,7 @@ bb7: ; preds = %bb1
; CODE-NEXT: {
; CODE-NEXT: dim3 k0_dimBlock(32);
; CODE-NEXT: dim3 k0_dimGrid(32);
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, dev_MemRef_b);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
@ -113,7 +113,7 @@ bb7: ; preds = %bb1
; CODE-NEXT: {
; CODE-NEXT: dim3 k0_dimBlock(32);
; CODE-NEXT: dim3 k0_dimGrid(32);
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, dev_MemRef_b);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
@ -163,7 +163,7 @@ bb7: ; preds = %bb1
; CODE-NEXT: {
; CODE-NEXT: dim3 k0_dimBlock(32);
; CODE-NEXT: dim3 k0_dimGrid(32);
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, dev_MemRef_b);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
@ -213,7 +213,7 @@ bb7: ; preds = %bb1
; CODE-NEXT: {
; CODE-NEXT: dim3 k0_dimBlock(32);
; CODE-NEXT: dim3 k0_dimGrid(32);
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, dev_MemRef_b);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
@ -263,7 +263,7 @@ bb7: ; preds = %bb1
; CODE-NEXT: {
; CODE-NEXT: dim3 k0_dimBlock(32);
; CODE-NEXT: dim3 k0_dimGrid(32);
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, dev_MemRef_b);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
@ -312,7 +312,7 @@ bb7: ; preds = %bb1
; CODE-NEXT: {
; CODE-NEXT: dim3 k0_dimBlock(32);
; CODE-NEXT: dim3 k0_dimGrid(32);
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
@ -361,7 +361,7 @@ bb7: ; preds = %bb1
; CODE-NEXT: {
; CODE-NEXT: dim3 k0_dimBlock(32);
; CODE-NEXT: dim3 k0_dimGrid(32);
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
@ -410,7 +410,7 @@ bb7: ; preds = %bb1
; CODE-NEXT: {
; CODE-NEXT: dim3 k0_dimBlock(32);
; CODE-NEXT: dim3 k0_dimGrid(32);
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
@ -459,7 +459,7 @@ bb7: ; preds = %bb1
; CODE-NEXT: {
; CODE-NEXT: dim3 k0_dimBlock(32);
; CODE-NEXT: dim3 k0_dimGrid(32);
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
@ -508,7 +508,7 @@ bb7: ; preds = %bb1
; CODE-NEXT: {
; CODE-NEXT: dim3 k0_dimBlock(32);
; CODE-NEXT: dim3 k0_dimGrid(32);
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
@ -557,7 +557,7 @@ bb7: ; preds = %bb1
; CODE-NEXT: {
; CODE-NEXT: dim3 k0_dimBlock(32);
; CODE-NEXT: dim3 k0_dimGrid(32);
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
@ -606,7 +606,7 @@ bb7: ; preds = %bb1
; CODE-NEXT: {
; CODE-NEXT: dim3 k0_dimBlock(32);
; CODE-NEXT: dim3 k0_dimGrid(32);
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
@ -655,7 +655,7 @@ bb7: ; preds = %bb1
; CODE-NEXT: {
; CODE-NEXT: dim3 k0_dimBlock(32);
; CODE-NEXT: dim3 k0_dimGrid(32);
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
@ -704,7 +704,7 @@ bb7: ; preds = %bb1
; CODE-NEXT: {
; CODE-NEXT: dim3 k0_dimBlock(32);
; CODE-NEXT: dim3 k0_dimGrid(32);
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
@ -753,7 +753,7 @@ bb7: ; preds = %bb1
; CODE-NEXT: {
; CODE-NEXT: dim3 k0_dimBlock(32);
; CODE-NEXT: dim3 k0_dimGrid(32);
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }

View File

@ -40,14 +40,14 @@ target triple = "x86_64-unknown-linux-gnu"
; CODE-NEXT: {
; CODE-NEXT: dim3 k0_dimBlock(16, 32);
; CODE-NEXT: dim3 k0_dimGrid(128, 128);
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_tmp, dev_MemRef_A, dev_MemRef_alpha, dev_MemRef_B);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
; CODE: {
; CODE-NEXT: dim3 k1_dimBlock(16, 32);
; CODE-NEXT: dim3 k1_dimGrid(128, 128);
; CODE-NEXT: kernel1 <<<k1_dimGrid, k1_dimBlock>>> ();
; CODE-NEXT: kernel1 <<<k1_dimGrid, k1_dimBlock>>> (dev_MemRef_tmp, dev_MemRef_D, dev_MemRef_beta, dev_MemRef_C);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }