forked from OSchip/llvm-project
GPGPU: collect array references
Initialize the list of references to a GPU array to ensure that the arrays that need to be passed to kernel calls are computed correctly. Furthermore, the very same information is also necessary to compute synchronization correctly. As the functionality to compute these references is already available, what is left for us to do is only to connect the necessary functionality to compute array reference information. llvm-svn: 275798
This commit is contained in:
parent
1fb9b64dc0
commit
b9fc860a57
|
@ -450,6 +450,8 @@ public:
|
|||
|
||||
setArrayBounds(PPCGArray, Array);
|
||||
i++;
|
||||
|
||||
collect_references(PPCGProg, &PPCGArray);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -55,7 +55,7 @@ static const char *get_outer_array_name(__isl_keep isl_map *access)
|
|||
/* Collect all references to the given array and store pointers to them
|
||||
* in array->refs.
|
||||
*/
|
||||
static void collect_references(struct gpu_prog *prog,
|
||||
void collect_references(struct gpu_prog *prog,
|
||||
struct gpu_array_info *array)
|
||||
{
|
||||
int i;
|
||||
|
|
|
@ -371,4 +371,5 @@ __isl_give isl_ast_node *generate_code(struct gpu_gen *gen,
|
|||
__isl_take isl_schedule *schedule);
|
||||
|
||||
__isl_give isl_union_set *compute_may_persist(struct gpu_prog *prog);
|
||||
void collect_references(struct gpu_prog *prog, struct gpu_array_info *array);
|
||||
#endif
|
||||
|
|
|
@ -69,7 +69,7 @@
|
|||
; CODE-NEXT: {
|
||||
; CODE-NEXT: dim3 k0_dimBlock(16, 32);
|
||||
; CODE-NEXT: dim3 k0_dimGrid(32, 32);
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
|
||||
; CODE-NEXT: cudaCheckKernel();
|
||||
; CODE-NEXT: }
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
; CODE-NEXT: {
|
||||
; CODE-NEXT: dim3 k0_dimBlock(32);
|
||||
; CODE-NEXT: dim3 k0_dimGrid(4);
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (c0);
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, c0);
|
||||
; CODE-NEXT: cudaCheckKernel();
|
||||
; CODE-NEXT: }
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) #0
|
|||
; CODE-NEXT: {
|
||||
; CODE-NEXT: dim3 k0_dimBlock(32);
|
||||
; CODE-NEXT: dim3 k0_dimGrid(16);
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (p_0, p_1);
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, dev_MemRef_Q, p_0, p_1);
|
||||
; CODE-NEXT: cudaCheckKernel();
|
||||
; CODE-NEXT: }
|
||||
|
||||
|
@ -28,14 +28,14 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) #0
|
|||
; CODE-NEXT: {
|
||||
; CODE-NEXT: dim3 k1_dimBlock(32);
|
||||
; CODE-NEXT: dim3 k1_dimGrid(p_1 <= -1048034 ? 32768 : -p_1 + floord(31 * p_1 + 30, 32) + 16);
|
||||
; CODE-NEXT: kernel1 <<<k1_dimGrid, k1_dimBlock>>> (p_0, p_1);
|
||||
; CODE-NEXT: kernel1 <<<k1_dimGrid, k1_dimBlock>>> (dev_MemRef_A, dev_MemRef_R, dev_MemRef_Q, p_0, p_1);
|
||||
; CODE-NEXT: cudaCheckKernel();
|
||||
; CODE-NEXT: }
|
||||
|
||||
; CODE: {
|
||||
; CODE-NEXT: dim3 k2_dimBlock(16, 32);
|
||||
; CODE-NEXT: dim3 k2_dimGrid(16, p_1 <= -7650 ? 256 : -p_1 + floord(31 * p_1 + 30, 32) + 16);
|
||||
; CODE-NEXT: kernel2 <<<k2_dimGrid, k2_dimBlock>>> (p_0, p_1);
|
||||
; CODE-NEXT: kernel2 <<<k2_dimGrid, k2_dimBlock>>> (dev_MemRef_A, dev_MemRef_R, dev_MemRef_Q, p_0, p_1);
|
||||
; CODE-NEXT: cudaCheckKernel();
|
||||
; CODE-NEXT: }
|
||||
|
||||
|
@ -53,11 +53,13 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) #0
|
|||
|
||||
; CODE: # kernel1
|
||||
; CODE-NEXT: for (int c0 = 0; c0 <= (-p_1 - 32 * b0 + 510) / 1048576; c0 += 1)
|
||||
; CODE-NEXT: if (p_1 + 32 * b0 + t0 + 1048576 * c0 <= 510) {
|
||||
; CODE-NEXT: Stmt_for_body35(32 * b0 + t0 + 1048576 * c0);
|
||||
; CODE-NEXT: for (int c1 = 0; c1 <= 15; c1 += 1)
|
||||
; CODE-NEXT: for (int c1 = 0; c1 <= 15; c1 += 1) {
|
||||
; CODE-NEXT: if (p_1 + 32 * b0 + t0 + 1048576 * c0 <= 510 && c1 == 0)
|
||||
; CODE-NEXT: Stmt_for_body35(32 * b0 + t0 + 1048576 * c0);
|
||||
; CODE-NEXT: if (p_1 + 32 * b0 + t0 + 1048576 * c0 <= 510)
|
||||
; CODE-NEXT: for (int c3 = 0; c3 <= 31; c3 += 1)
|
||||
; CODE-NEXT: Stmt_for_body42(32 * b0 + t0 + 1048576 * c0, 32 * c1 + c3);
|
||||
; CODE-NEXT: sync0();
|
||||
; CODE-NEXT: }
|
||||
|
||||
; CODE: # kernel2
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
; CODE-NEXT: {
|
||||
; CODE-NEXT: dim3 k0_dimBlock(32);
|
||||
; CODE-NEXT: dim3 k0_dimGrid(32);
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, dev_MemRef_b);
|
||||
; CODE-NEXT: cudaCheckKernel();
|
||||
; CODE-NEXT: }
|
||||
|
||||
|
@ -63,7 +63,7 @@ bb7: ; preds = %bb1
|
|||
; CODE-NEXT: {
|
||||
; CODE-NEXT: dim3 k0_dimBlock(32);
|
||||
; CODE-NEXT: dim3 k0_dimGrid(32);
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, dev_MemRef_b);
|
||||
; CODE-NEXT: cudaCheckKernel();
|
||||
; CODE-NEXT: }
|
||||
|
||||
|
@ -113,7 +113,7 @@ bb7: ; preds = %bb1
|
|||
; CODE-NEXT: {
|
||||
; CODE-NEXT: dim3 k0_dimBlock(32);
|
||||
; CODE-NEXT: dim3 k0_dimGrid(32);
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, dev_MemRef_b);
|
||||
; CODE-NEXT: cudaCheckKernel();
|
||||
; CODE-NEXT: }
|
||||
|
||||
|
@ -163,7 +163,7 @@ bb7: ; preds = %bb1
|
|||
; CODE-NEXT: {
|
||||
; CODE-NEXT: dim3 k0_dimBlock(32);
|
||||
; CODE-NEXT: dim3 k0_dimGrid(32);
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, dev_MemRef_b);
|
||||
; CODE-NEXT: cudaCheckKernel();
|
||||
; CODE-NEXT: }
|
||||
|
||||
|
@ -213,7 +213,7 @@ bb7: ; preds = %bb1
|
|||
; CODE-NEXT: {
|
||||
; CODE-NEXT: dim3 k0_dimBlock(32);
|
||||
; CODE-NEXT: dim3 k0_dimGrid(32);
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, dev_MemRef_b);
|
||||
; CODE-NEXT: cudaCheckKernel();
|
||||
; CODE-NEXT: }
|
||||
|
||||
|
@ -263,7 +263,7 @@ bb7: ; preds = %bb1
|
|||
; CODE-NEXT: {
|
||||
; CODE-NEXT: dim3 k0_dimBlock(32);
|
||||
; CODE-NEXT: dim3 k0_dimGrid(32);
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, dev_MemRef_b);
|
||||
; CODE-NEXT: cudaCheckKernel();
|
||||
; CODE-NEXT: }
|
||||
|
||||
|
@ -312,7 +312,7 @@ bb7: ; preds = %bb1
|
|||
; CODE-NEXT: {
|
||||
; CODE-NEXT: dim3 k0_dimBlock(32);
|
||||
; CODE-NEXT: dim3 k0_dimGrid(32);
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
|
||||
; CODE-NEXT: cudaCheckKernel();
|
||||
; CODE-NEXT: }
|
||||
|
||||
|
@ -361,7 +361,7 @@ bb7: ; preds = %bb1
|
|||
; CODE-NEXT: {
|
||||
; CODE-NEXT: dim3 k0_dimBlock(32);
|
||||
; CODE-NEXT: dim3 k0_dimGrid(32);
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
|
||||
; CODE-NEXT: cudaCheckKernel();
|
||||
; CODE-NEXT: }
|
||||
|
||||
|
@ -410,7 +410,7 @@ bb7: ; preds = %bb1
|
|||
; CODE-NEXT: {
|
||||
; CODE-NEXT: dim3 k0_dimBlock(32);
|
||||
; CODE-NEXT: dim3 k0_dimGrid(32);
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
|
||||
; CODE-NEXT: cudaCheckKernel();
|
||||
; CODE-NEXT: }
|
||||
|
||||
|
@ -459,7 +459,7 @@ bb7: ; preds = %bb1
|
|||
; CODE-NEXT: {
|
||||
; CODE-NEXT: dim3 k0_dimBlock(32);
|
||||
; CODE-NEXT: dim3 k0_dimGrid(32);
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
|
||||
; CODE-NEXT: cudaCheckKernel();
|
||||
; CODE-NEXT: }
|
||||
|
||||
|
@ -508,7 +508,7 @@ bb7: ; preds = %bb1
|
|||
; CODE-NEXT: {
|
||||
; CODE-NEXT: dim3 k0_dimBlock(32);
|
||||
; CODE-NEXT: dim3 k0_dimGrid(32);
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
|
||||
; CODE-NEXT: cudaCheckKernel();
|
||||
; CODE-NEXT: }
|
||||
|
||||
|
@ -557,7 +557,7 @@ bb7: ; preds = %bb1
|
|||
; CODE-NEXT: {
|
||||
; CODE-NEXT: dim3 k0_dimBlock(32);
|
||||
; CODE-NEXT: dim3 k0_dimGrid(32);
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
|
||||
; CODE-NEXT: cudaCheckKernel();
|
||||
; CODE-NEXT: }
|
||||
|
||||
|
@ -606,7 +606,7 @@ bb7: ; preds = %bb1
|
|||
; CODE-NEXT: {
|
||||
; CODE-NEXT: dim3 k0_dimBlock(32);
|
||||
; CODE-NEXT: dim3 k0_dimGrid(32);
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
|
||||
; CODE-NEXT: cudaCheckKernel();
|
||||
; CODE-NEXT: }
|
||||
|
||||
|
@ -655,7 +655,7 @@ bb7: ; preds = %bb1
|
|||
; CODE-NEXT: {
|
||||
; CODE-NEXT: dim3 k0_dimBlock(32);
|
||||
; CODE-NEXT: dim3 k0_dimGrid(32);
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
|
||||
; CODE-NEXT: cudaCheckKernel();
|
||||
; CODE-NEXT: }
|
||||
|
||||
|
@ -704,7 +704,7 @@ bb7: ; preds = %bb1
|
|||
; CODE-NEXT: {
|
||||
; CODE-NEXT: dim3 k0_dimBlock(32);
|
||||
; CODE-NEXT: dim3 k0_dimGrid(32);
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
|
||||
; CODE-NEXT: cudaCheckKernel();
|
||||
; CODE-NEXT: }
|
||||
|
||||
|
@ -753,7 +753,7 @@ bb7: ; preds = %bb1
|
|||
; CODE-NEXT: {
|
||||
; CODE-NEXT: dim3 k0_dimBlock(32);
|
||||
; CODE-NEXT: dim3 k0_dimGrid(32);
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
|
||||
; CODE-NEXT: cudaCheckKernel();
|
||||
; CODE-NEXT: }
|
||||
|
||||
|
|
|
@ -40,14 +40,14 @@ target triple = "x86_64-unknown-linux-gnu"
|
|||
; CODE-NEXT: {
|
||||
; CODE-NEXT: dim3 k0_dimBlock(16, 32);
|
||||
; CODE-NEXT: dim3 k0_dimGrid(128, 128);
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
|
||||
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_tmp, dev_MemRef_A, dev_MemRef_alpha, dev_MemRef_B);
|
||||
; CODE-NEXT: cudaCheckKernel();
|
||||
; CODE-NEXT: }
|
||||
|
||||
; CODE: {
|
||||
; CODE-NEXT: dim3 k1_dimBlock(16, 32);
|
||||
; CODE-NEXT: dim3 k1_dimGrid(128, 128);
|
||||
; CODE-NEXT: kernel1 <<<k1_dimGrid, k1_dimBlock>>> ();
|
||||
; CODE-NEXT: kernel1 <<<k1_dimGrid, k1_dimBlock>>> (dev_MemRef_tmp, dev_MemRef_D, dev_MemRef_beta, dev_MemRef_C);
|
||||
; CODE-NEXT: cudaCheckKernel();
|
||||
; CODE-NEXT: }
|
||||
|
||||
|
|
Loading…
Reference in New Issue