[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - -femit-all-decls | FileCheck %s
|
|
|
|
// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
|
|
|
|
// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls | FileCheck %s
|
|
|
|
// expected-no-diagnostics
|
|
|
|
// REQUIRES: x86-registered-target
|
|
|
|
#ifndef HEADER
|
|
|
|
#define HEADER
|
|
|
|
|
|
|
|
// CHECK-LABEL: @main
|
|
|
|
int main(int argc, char **argv) {
|
|
|
|
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%ident_t* [[DEFLOC:@.+]])
|
|
|
|
// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 64, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK1:@.+]] to i32 (i32, i8*)*))
|
|
|
|
// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]*
|
|
|
|
// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0
|
|
|
|
// CHECK: getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 3
|
|
|
|
// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** %{{.+}}
|
|
|
|
// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 4
|
|
|
|
// CHECK: store i64 0, i64* [[DOWN]],
|
|
|
|
// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5
|
|
|
|
// CHECK: store i64 9, i64* [[UP]],
|
|
|
|
// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
|
|
|
|
// CHECK: store i64 1, i64* [[ST]],
|
|
|
|
// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
|
|
|
|
// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 0, i64 0, i8* null)
|
|
|
|
#pragma omp taskloop
|
|
|
|
for (int i = 0; i < 10; ++i)
|
|
|
|
;
|
|
|
|
// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 64, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK2:@.+]] to i32 (i32, i8*)*))
|
|
|
|
// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]*
|
|
|
|
// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0
|
|
|
|
// CHECK: getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 3
|
|
|
|
// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** %{{.+}}
|
|
|
|
// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 4
|
|
|
|
// CHECK: store i64 0, i64* [[DOWN]],
|
|
|
|
// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5
|
|
|
|
// CHECK: store i64 9, i64* [[UP]],
|
|
|
|
// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
|
|
|
|
// CHECK: store i64 1, i64* [[ST]],
|
|
|
|
// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
|
[OPENMP 4.5] Codegen for 'grainsize/num_tasks' clauses of 'taskloop'
directive.
OpenMP 4.5 defines 'taskloop' directive and 2 additional clauses
'grainsize' and 'num_tasks' for this directive. Patch adds codegen for
these clauses.
These clauses are generated as arguments of the '__kmpc_taskloop'
libcall and are encoded the following way:
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, void *task_dup);
If 'grainsize' is specified, 'sched' argument must be set to '1' and
'grainsize' argument must be set to the value of the 'grainsize' clause.
If 'num_tasks' is specified, 'sched' argument must be set to '2' and
'grainsize' argument must be set to the value of the 'num_tasks' clause.
It is possible because these 2 clauses are mutually exclusive and can't
be used at the same time on the same directive.
If none of these clauses is specified, 'sched' argument must be set to
'0'.
llvm-svn: 267862
2016-04-28 17:15:06 +08:00
|
|
|
// CHECK: [[GRAINSIZE:%.+]] = zext i32 %{{.+}} to i64
|
|
|
|
// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 1, i64 [[GRAINSIZE]], i8* null)
|
|
|
|
#pragma omp taskloop nogroup grainsize(argc)
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
for (int i = 0; i < 10; ++i)
|
|
|
|
;
|
|
|
|
// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 64, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK3:@.+]] to i32 (i32, i8*)*))
|
|
|
|
// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]*
|
|
|
|
// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0
|
|
|
|
// CHECK: getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 3
|
|
|
|
// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** %{{.+}}
|
|
|
|
// CHECK: [[IF:%.+]] = icmp ne i32 %{{.+}}, 0
|
|
|
|
// CHECK: [[IF_INT:%.+]] = sext i1 [[IF]] to i32
|
|
|
|
// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 4
|
|
|
|
// CHECK: store i64 0, i64* [[DOWN]],
|
|
|
|
// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5
|
|
|
|
// CHECK: store i64 %{{.+}}, i64* [[UP]],
|
|
|
|
// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
|
|
|
|
// CHECK: store i64 1, i64* [[ST]],
|
|
|
|
// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
|
[OPENMP 4.5] Codegen for 'grainsize/num_tasks' clauses of 'taskloop'
directive.
OpenMP 4.5 defines 'taskloop' directive and 2 additional clauses
'grainsize' and 'num_tasks' for this directive. Patch adds codegen for
these clauses.
These clauses are generated as arguments of the '__kmpc_taskloop'
libcall and are encoded the following way:
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, void *task_dup);
If 'grainsize' is specified, 'sched' argument must be set to '1' and
'grainsize' argument must be set to the value of the 'grainsize' clause.
If 'num_tasks' is specified, 'sched' argument must be set to '2' and
'grainsize' argument must be set to the value of the 'num_tasks' clause.
It is possible because these 2 clauses are mutually exclusive and can't
be used at the same time on the same directive.
If none of these clauses is specified, 'sched' argument must be set to
'0'.
llvm-svn: 267862
2016-04-28 17:15:06 +08:00
|
|
|
// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 [[IF_INT]], i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 2, i64 4, i8* null)
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
int i;
|
[OPENMP 4.5] Codegen for 'grainsize/num_tasks' clauses of 'taskloop'
directive.
OpenMP 4.5 defines 'taskloop' directive and 2 additional clauses
'grainsize' and 'num_tasks' for this directive. Patch adds codegen for
these clauses.
These clauses are generated as arguments of the '__kmpc_taskloop'
libcall and are encoded the following way:
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, void *task_dup);
If 'grainsize' is specified, 'sched' argument must be set to '1' and
'grainsize' argument must be set to the value of the 'grainsize' clause.
If 'num_tasks' is specified, 'sched' argument must be set to '2' and
'grainsize' argument must be set to the value of the 'num_tasks' clause.
It is possible because these 2 clauses are mutually exclusive and can't
be used at the same time on the same directive.
If none of these clauses is specified, 'sched' argument must be set to
'0'.
llvm-svn: 267862
2016-04-28 17:15:06 +08:00
|
|
|
#pragma omp taskloop if(argc) shared(argc, argv) collapse(2) num_tasks(4)
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
for (i = 0; i < argc; ++i)
|
|
|
|
for (int j = argc; j < argv[argc][argc]; ++j)
|
|
|
|
;
|
|
|
|
}
|
|
|
|
|
|
|
|
// CHECK: define internal i32 [[TASK1]](
|
|
|
|
// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 4
|
|
|
|
// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]],
|
|
|
|
// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 5
|
|
|
|
// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]],
|
|
|
|
// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6
|
|
|
|
// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
|
|
|
|
// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7
|
|
|
|
// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]],
|
|
|
|
// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]],
|
|
|
|
// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]],
|
|
|
|
// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]],
|
|
|
|
// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]],
|
|
|
|
// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
|
|
|
|
// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
|
|
|
|
// CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]],
|
|
|
|
// CHECK: br label
|
|
|
|
// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],
|
|
|
|
// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
|
|
|
|
// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],
|
|
|
|
// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
|
|
|
|
// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
|
|
|
|
// CHECK: load i32, i32* %
|
|
|
|
// CHECK: store i32 %
|
|
|
|
// CHECK: load i32, i32* %
|
|
|
|
// CHECK: add nsw i32 %{{.+}}, 1
|
|
|
|
// CHECK: store i32 %{{.+}}, i32* %
|
|
|
|
// CHECK: br label %
|
|
|
|
// CHECK: ret i32 0
|
|
|
|
|
|
|
|
// CHECK: define internal i32 [[TASK2]](
|
|
|
|
// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 4
|
|
|
|
// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]],
|
|
|
|
// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 5
|
|
|
|
// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]],
|
|
|
|
// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6
|
|
|
|
// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
|
|
|
|
// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7
|
|
|
|
// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]],
|
|
|
|
// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]],
|
|
|
|
// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]],
|
|
|
|
// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]],
|
|
|
|
// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]],
|
|
|
|
// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
|
|
|
|
// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
|
|
|
|
// CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]],
|
|
|
|
// CHECK: br label
|
|
|
|
// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],
|
|
|
|
// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
|
|
|
|
// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],
|
|
|
|
// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
|
|
|
|
// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
|
|
|
|
// CHECK: load i32, i32* %
|
|
|
|
// CHECK: store i32 %
|
|
|
|
// CHECK: load i32, i32* %
|
|
|
|
// CHECK: add nsw i32 %{{.+}}, 1
|
|
|
|
// CHECK: store i32 %{{.+}}, i32* %
|
|
|
|
// CHECK: br label %
|
|
|
|
// CHECK: ret i32 0
|
|
|
|
|
|
|
|
// CHECK: define internal i32 [[TASK3]](
|
|
|
|
// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 4
|
|
|
|
// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]],
|
|
|
|
// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 5
|
|
|
|
// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]],
|
|
|
|
// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6
|
|
|
|
// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
|
|
|
|
// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7
|
|
|
|
// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]],
|
|
|
|
// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]],
|
|
|
|
// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]],
|
|
|
|
// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]],
|
|
|
|
// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]],
|
|
|
|
// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
|
|
|
|
// CHECK: store i64 [[LB_VAL]], i64* [[CNT:%.+]],
|
|
|
|
// CHECK: br label
|
|
|
|
// CHECK: ret i32 0
|
|
|
|
|
|
|
|
// CHECK-LABEL: @_ZN1SC2Ei
|
|
|
|
struct S {
|
|
|
|
int a;
|
|
|
|
S(int c) {
|
|
|
|
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%ident_t* [[DEFLOC:@.+]])
|
|
|
|
// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 64, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK4:@.+]] to i32 (i32, i8*)*))
|
|
|
|
// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]*
|
|
|
|
// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0
|
|
|
|
// CHECK: getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 3
|
|
|
|
// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** %{{.+}}
|
|
|
|
// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 4
|
|
|
|
// CHECK: store i64 0, i64* [[DOWN]],
|
|
|
|
// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5
|
|
|
|
// CHECK: store i64 %{{.+}}, i64* [[UP]],
|
|
|
|
// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
|
|
|
|
// CHECK: store i64 1, i64* [[ST]],
|
|
|
|
// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
|
[OPENMP 4.5] Codegen for 'grainsize/num_tasks' clauses of 'taskloop'
directive.
OpenMP 4.5 defines 'taskloop' directive and 2 additional clauses
'grainsize' and 'num_tasks' for this directive. Patch adds codegen for
these clauses.
These clauses are generated as arguments of the '__kmpc_taskloop'
libcall and are encoded the following way:
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, void *task_dup);
If 'grainsize' is specified, 'sched' argument must be set to '1' and
'grainsize' argument must be set to the value of the 'grainsize' clause.
If 'num_tasks' is specified, 'sched' argument must be set to '2' and
'grainsize' argument must be set to the value of the 'num_tasks' clause.
It is possible because these 2 clauses are mutually exclusive and can't
be used at the same time on the same directive.
If none of these clauses is specified, 'sched' argument must be set to
'0'.
llvm-svn: 267862
2016-04-28 17:15:06 +08:00
|
|
|
// CHECK: [[NUM_TASKS:%.+]] = zext i32 %{{.+}} to i64
|
|
|
|
// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 2, i64 [[NUM_TASKS]], i8* null)
|
|
|
|
#pragma omp taskloop shared(c) num_tasks(a)
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
for (a = 0; a < c; ++a)
|
|
|
|
;
|
|
|
|
}
|
|
|
|
} s(1);
|
|
|
|
|
|
|
|
// CHECK: define internal i32 [[TASK4]](
|
|
|
|
// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 4
|
|
|
|
// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]],
|
|
|
|
// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 5
|
|
|
|
// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]],
|
|
|
|
// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6
|
|
|
|
// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
|
|
|
|
// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7
|
|
|
|
// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]],
|
|
|
|
// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]],
|
|
|
|
// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]],
|
|
|
|
// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]],
|
|
|
|
// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]],
|
|
|
|
// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
|
|
|
|
// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
|
|
|
|
// CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]],
|
|
|
|
// CHECK: br label
|
|
|
|
// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],
|
|
|
|
// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
|
|
|
|
// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],
|
|
|
|
// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
|
|
|
|
// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
|
|
|
|
// CHECK: load i32, i32* %
|
|
|
|
// CHECK: store i32 %
|
|
|
|
// CHECK: load i32, i32* %
|
|
|
|
// CHECK: add nsw i32 %{{.+}}, 1
|
|
|
|
// CHECK: store i32 %{{.+}}, i32* %
|
|
|
|
// CHECK: br label %
|
|
|
|
// CHECK: ret i32 0
|
|
|
|
|
|
|
|
#endif
|