2016-04-28 20:14:51 +08:00
|
|
|
// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - -femit-all-decls | FileCheck %s
|
|
|
|
// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
|
|
|
|
// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls | FileCheck %s
|
2017-12-30 02:07:07 +08:00
|
|
|
|
|
|
|
// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c++ -emit-llvm %s -o - -femit-all-decls | FileCheck --check-prefix SIMD-ONLY0 %s
|
|
|
|
// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
|
|
|
|
// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls | FileCheck --check-prefix SIMD-ONLY0 %s
|
|
|
|
// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
|
2016-04-28 20:14:51 +08:00
|
|
|
// expected-no-diagnostics
|
|
|
|
#ifndef HEADER
|
|
|
|
#define HEADER
|
|
|
|
|
|
|
|
// CHECK-LABEL: @main
|
|
|
|
int main(int argc, char **argv) {
|
2018-04-17 01:59:34 +08:00
|
|
|
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[DEFLOC:@.+]])
|
|
|
|
// CHECK: call void @__kmpc_taskgroup(%struct.ident_t* [[DEFLOC]], i32 [[GTID]])
|
|
|
|
// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i32 33, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK1:@.+]] to i32 (i32, i8*)*))
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]*
|
|
|
|
// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0
|
2016-05-30 17:06:50 +08:00
|
|
|
// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: store i64 0, i64* [[DOWN]],
|
2016-05-30 17:06:50 +08:00
|
|
|
// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: store i64 9, i64* [[UP]],
|
2016-05-30 17:06:50 +08:00
|
|
|
// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 7
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: store i64 1, i64* [[ST]],
|
|
|
|
// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
|
Do not always request an implicit taskgroup region inside the kmpc_taskloop function
Summary:
For the following code:
```
int i;
#pragma omp taskloop
for (i = 0; i < 100; ++i)
{}
#pragma omp taskloop nogroup
for (i = 0; i < 100; ++i)
{}
```
Clang emits the following LLVM IR:
```
...
call void @__kmpc_taskgroup(%struct.ident_t* @0, i32 %0)
%2 = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 %0, i32 1, i64 80, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
...
call void @__kmpc_taskloop(%struct.ident_t* @0, i32 %0, i8* %2, i32 1, i64* %8, i64* %9, i64 %13, i32 0, i32 0, i64 0, i8* null)
call void @__kmpc_end_taskgroup(%struct.ident_t* @0, i32 %0)
...
%15 = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 %0, i32 1, i64 80, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.1*)* @.omp_task_entry..2 to i32 (i32, i8*)*))
...
call void @__kmpc_taskloop(%struct.ident_t* @0, i32 %0, i8* %15, i32 1, i64* %21, i64* %22, i64 %26, i32 0, i32 0, i64 0, i8* null)
```
The first set of instructions corresponds to the first taskloop construct. It is important to note that the implicit taskgroup region associated with the taskloop construct has been materialized in our IR: the `__kmpc_taskloop` occurs inside a taskgroup region. Note also that this taskgroup region does not exist in our second taskloop because we are using the `nogroup` clause.
The issue here is the 4th argument of the kmpc_taskloop call, starting from the end, is always a zero. Checking the LLVM OpenMP RT implementation, we see that this argument corresponds to the nogroup parameter:
```
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup,
int sched, kmp_uint64 grainsize, void *task_dup);
```
So basically we always tell to the RT to do another taskgroup region. For the first taskloop, this means that we create two taskgroup regions. For the second example, it means that despite the fact we had a nogroup clause we are going to have a taskgroup region, so we unnecessary wait until all descendant tasks have been executed.
Reviewers: ABataev
Reviewed By: ABataev
Subscribers: rogfer01, cfe-commits
Differential Revision: https://reviews.llvm.org/D53636
llvm-svn: 345180
2018-10-25 03:06:37 +08:00
|
|
|
// CHECK: call void @__kmpc_taskloop(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 0, i64 0, i8* null)
|
2018-04-17 01:59:34 +08:00
|
|
|
// CHECK: call void @__kmpc_end_taskgroup(%struct.ident_t* [[DEFLOC]], i32 [[GTID]])
|
2016-05-10 18:36:51 +08:00
|
|
|
#pragma omp taskloop simd priority(argc)
|
2016-04-28 20:14:51 +08:00
|
|
|
for (int i = 0; i < 10; ++i)
|
|
|
|
;
|
2018-04-17 01:59:34 +08:00
|
|
|
// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK2:@.+]] to i32 (i32, i8*)*))
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]*
|
|
|
|
// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0
|
2016-05-30 17:06:50 +08:00
|
|
|
// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: store i64 0, i64* [[DOWN]],
|
2016-05-30 17:06:50 +08:00
|
|
|
// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: store i64 9, i64* [[UP]],
|
2016-05-30 17:06:50 +08:00
|
|
|
// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 7
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: store i64 1, i64* [[ST]],
|
|
|
|
// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
|
|
|
|
// CHECK: [[GRAINSIZE:%.+]] = zext i32 %{{.+}} to i64
|
Do not always request an implicit taskgroup region inside the kmpc_taskloop function
Summary:
For the following code:
```
int i;
#pragma omp taskloop
for (i = 0; i < 100; ++i)
{}
#pragma omp taskloop nogroup
for (i = 0; i < 100; ++i)
{}
```
Clang emits the following LLVM IR:
```
...
call void @__kmpc_taskgroup(%struct.ident_t* @0, i32 %0)
%2 = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 %0, i32 1, i64 80, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
...
call void @__kmpc_taskloop(%struct.ident_t* @0, i32 %0, i8* %2, i32 1, i64* %8, i64* %9, i64 %13, i32 0, i32 0, i64 0, i8* null)
call void @__kmpc_end_taskgroup(%struct.ident_t* @0, i32 %0)
...
%15 = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 %0, i32 1, i64 80, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.1*)* @.omp_task_entry..2 to i32 (i32, i8*)*))
...
call void @__kmpc_taskloop(%struct.ident_t* @0, i32 %0, i8* %15, i32 1, i64* %21, i64* %22, i64 %26, i32 0, i32 0, i64 0, i8* null)
```
The first set of instructions corresponds to the first taskloop construct. It is important to note that the implicit taskgroup region associated with the taskloop construct has been materialized in our IR: the `__kmpc_taskloop` occurs inside a taskgroup region. Note also that this taskgroup region does not exist in our second taskloop because we are using the `nogroup` clause.
The issue here is the 4th argument of the kmpc_taskloop call, starting from the end, is always a zero. Checking the LLVM OpenMP RT implementation, we see that this argument corresponds to the nogroup parameter:
```
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup,
int sched, kmp_uint64 grainsize, void *task_dup);
```
So basically we always tell to the RT to do another taskgroup region. For the first taskloop, this means that we create two taskgroup regions. For the second example, it means that despite the fact we had a nogroup clause we are going to have a taskgroup region, so we unnecessary wait until all descendant tasks have been executed.
Reviewers: ABataev
Reviewed By: ABataev
Subscribers: rogfer01, cfe-commits
Differential Revision: https://reviews.llvm.org/D53636
llvm-svn: 345180
2018-10-25 03:06:37 +08:00
|
|
|
// CHECK: call void @__kmpc_taskloop(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 1, i64 [[GRAINSIZE]], i8* null)
|
2016-04-28 20:14:51 +08:00
|
|
|
#pragma omp taskloop simd nogroup grainsize(argc) simdlen(4)
|
|
|
|
for (int i = 0; i < 10; ++i)
|
|
|
|
;
|
2018-04-17 01:59:34 +08:00
|
|
|
// CHECK: call void @__kmpc_taskgroup(%struct.ident_t* [[DEFLOC]], i32 [[GTID]])
|
|
|
|
// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 80, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK3:@.+]] to i32 (i32, i8*)*))
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]*
|
|
|
|
// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0
|
|
|
|
// CHECK: [[IF:%.+]] = icmp ne i32 %{{.+}}, 0
|
|
|
|
// CHECK: [[IF_INT:%.+]] = sext i1 [[IF]] to i32
|
2016-05-30 17:06:50 +08:00
|
|
|
// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: store i64 0, i64* [[DOWN]],
|
2016-05-30 17:06:50 +08:00
|
|
|
// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: store i64 %{{.+}}, i64* [[UP]],
|
2016-05-30 17:06:50 +08:00
|
|
|
// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 7
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: store i64 1, i64* [[ST]],
|
|
|
|
// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
|
Do not always request an implicit taskgroup region inside the kmpc_taskloop function
Summary:
For the following code:
```
int i;
#pragma omp taskloop
for (i = 0; i < 100; ++i)
{}
#pragma omp taskloop nogroup
for (i = 0; i < 100; ++i)
{}
```
Clang emits the following LLVM IR:
```
...
call void @__kmpc_taskgroup(%struct.ident_t* @0, i32 %0)
%2 = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 %0, i32 1, i64 80, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
...
call void @__kmpc_taskloop(%struct.ident_t* @0, i32 %0, i8* %2, i32 1, i64* %8, i64* %9, i64 %13, i32 0, i32 0, i64 0, i8* null)
call void @__kmpc_end_taskgroup(%struct.ident_t* @0, i32 %0)
...
%15 = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 %0, i32 1, i64 80, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.1*)* @.omp_task_entry..2 to i32 (i32, i8*)*))
...
call void @__kmpc_taskloop(%struct.ident_t* @0, i32 %0, i8* %15, i32 1, i64* %21, i64* %22, i64 %26, i32 0, i32 0, i64 0, i8* null)
```
The first set of instructions corresponds to the first taskloop construct. It is important to note that the implicit taskgroup region associated with the taskloop construct has been materialized in our IR: the `__kmpc_taskloop` occurs inside a taskgroup region. Note also that this taskgroup region does not exist in our second taskloop because we are using the `nogroup` clause.
The issue here is the 4th argument of the kmpc_taskloop call, starting from the end, is always a zero. Checking the LLVM OpenMP RT implementation, we see that this argument corresponds to the nogroup parameter:
```
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup,
int sched, kmp_uint64 grainsize, void *task_dup);
```
So basically we always tell to the RT to do another taskgroup region. For the first taskloop, this means that we create two taskgroup regions. For the second example, it means that despite the fact we had a nogroup clause we are going to have a taskgroup region, so we unnecessary wait until all descendant tasks have been executed.
Reviewers: ABataev
Reviewed By: ABataev
Subscribers: rogfer01, cfe-commits
Differential Revision: https://reviews.llvm.org/D53636
llvm-svn: 345180
2018-10-25 03:06:37 +08:00
|
|
|
// CHECK: call void @__kmpc_taskloop(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 [[IF_INT]], i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 2, i64 4, i8* null)
|
2018-04-17 01:59:34 +08:00
|
|
|
// CHECK: call void @__kmpc_end_taskgroup(%struct.ident_t* [[DEFLOC]], i32 [[GTID]])
|
2016-04-28 20:14:51 +08:00
|
|
|
int i;
|
|
|
|
#pragma omp taskloop simd if(argc) shared(argc, argv) collapse(2) num_tasks(4) safelen(32)
|
|
|
|
for (i = 0; i < argc; ++i)
|
|
|
|
for (int j = argc; j < argv[argc][argc]; ++j)
|
|
|
|
;
|
|
|
|
}
|
|
|
|
|
|
|
|
// CHECK: define internal i32 [[TASK1]](
|
2016-05-30 17:06:50 +08:00
|
|
|
// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 5
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]],
|
2016-05-30 17:06:50 +08:00
|
|
|
// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]],
|
2016-05-30 17:06:50 +08:00
|
|
|
// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
|
2016-05-30 17:06:50 +08:00
|
|
|
// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 8
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]],
|
|
|
|
// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]],
|
|
|
|
// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]],
|
|
|
|
// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]],
|
|
|
|
// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]],
|
|
|
|
// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
|
|
|
|
// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
|
|
|
|
// CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]],
|
|
|
|
// CHECK: br label
|
2018-12-21 05:24:54 +08:00
|
|
|
// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],{{.*}}!llvm.access.group
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
|
2018-12-21 05:24:54 +08:00
|
|
|
// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],{{.*}}!llvm.access.group
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
|
|
|
|
// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
|
2018-12-21 05:24:54 +08:00
|
|
|
// CHECK: load i32, i32* %{{.*}}!llvm.access.group
|
|
|
|
// CHECK: store i32 %{{.*}}!llvm.access.group
|
|
|
|
// CHECK: load i32, i32* %{{.*}}!llvm.access.group
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: add nsw i32 %{{.+}}, 1
|
2018-12-21 05:24:54 +08:00
|
|
|
// CHECK: store i32 %{{.+}}, i32* %{{.*}}!llvm.access.group
|
|
|
|
// CHECK: br label %{{.*}}!llvm.loop
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: ret i32 0
|
|
|
|
|
|
|
|
// CHECK: define internal i32 [[TASK2]](
|
2016-05-30 17:06:50 +08:00
|
|
|
// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 5
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]],
|
2016-05-30 17:06:50 +08:00
|
|
|
// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]],
|
2016-05-30 17:06:50 +08:00
|
|
|
// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
|
2016-05-30 17:06:50 +08:00
|
|
|
// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 8
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]],
|
|
|
|
// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]],
|
|
|
|
// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]],
|
|
|
|
// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]],
|
|
|
|
// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]],
|
|
|
|
// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
|
|
|
|
// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
|
|
|
|
// CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]],
|
|
|
|
// CHECK: br label
|
2018-12-21 05:24:54 +08:00
|
|
|
// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],{{.*}}!llvm.access.group
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
|
2018-12-21 05:24:54 +08:00
|
|
|
// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],{{.*}}!llvm.access.group
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
|
|
|
|
// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
|
2018-12-21 05:24:54 +08:00
|
|
|
// CHECK: load i32, i32* %{{.*}}!llvm.access.group
|
|
|
|
// CHECK: store i32 %{{.*}}!llvm.access.group
|
|
|
|
// CHECK: load i32, i32* %{{.*}}!llvm.access.group
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: add nsw i32 %{{.+}}, 1
|
2018-12-21 05:24:54 +08:00
|
|
|
// CHECK: store i32 %{{.+}}, i32* %{{.*}}!llvm.access.group
|
|
|
|
// CHECK: br label %{{.*}}!llvm.loop
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: ret i32 0
|
|
|
|
|
|
|
|
// CHECK: define internal i32 [[TASK3]](
|
2016-05-30 17:06:50 +08:00
|
|
|
// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 5
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]],
|
2016-05-30 17:06:50 +08:00
|
|
|
// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]],
|
2016-05-30 17:06:50 +08:00
|
|
|
// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
|
2016-05-30 17:06:50 +08:00
|
|
|
// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 8
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]],
|
|
|
|
// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]],
|
|
|
|
// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]],
|
|
|
|
// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]],
|
|
|
|
// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]],
|
|
|
|
// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
|
|
|
|
// CHECK: store i64 [[LB_VAL]], i64* [[CNT:%.+]],
|
|
|
|
// CHECK: br label
|
2018-12-21 05:24:54 +08:00
|
|
|
// CHECK-NOT: !llvm.access.group
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: br label %{{.*}}!llvm.loop
|
|
|
|
// CHECK: ret i32 0
|
|
|
|
|
|
|
|
// CHECK-LABEL: @_ZN1SC2Ei
|
|
|
|
struct S {
|
|
|
|
int a;
|
|
|
|
S(int c) {
|
2018-04-17 01:59:34 +08:00
|
|
|
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[DEFLOC:@.+]])
|
|
|
|
// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 80, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK4:@.+]] to i32 (i32, i8*)*))
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]*
|
|
|
|
// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0
|
2016-05-30 17:06:50 +08:00
|
|
|
// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: store i64 0, i64* [[DOWN]],
|
2016-05-30 17:06:50 +08:00
|
|
|
// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: store i64 %{{.+}}, i64* [[UP]],
|
2016-05-30 17:06:50 +08:00
|
|
|
// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 7
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: store i64 1, i64* [[ST]],
|
|
|
|
// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
|
|
|
|
// CHECK: [[NUM_TASKS:%.+]] = zext i32 %{{.+}} to i64
|
Do not always request an implicit taskgroup region inside the kmpc_taskloop function
Summary:
For the following code:
```
int i;
#pragma omp taskloop
for (i = 0; i < 100; ++i)
{}
#pragma omp taskloop nogroup
for (i = 0; i < 100; ++i)
{}
```
Clang emits the following LLVM IR:
```
...
call void @__kmpc_taskgroup(%struct.ident_t* @0, i32 %0)
%2 = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 %0, i32 1, i64 80, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
...
call void @__kmpc_taskloop(%struct.ident_t* @0, i32 %0, i8* %2, i32 1, i64* %8, i64* %9, i64 %13, i32 0, i32 0, i64 0, i8* null)
call void @__kmpc_end_taskgroup(%struct.ident_t* @0, i32 %0)
...
%15 = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 %0, i32 1, i64 80, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.1*)* @.omp_task_entry..2 to i32 (i32, i8*)*))
...
call void @__kmpc_taskloop(%struct.ident_t* @0, i32 %0, i8* %15, i32 1, i64* %21, i64* %22, i64 %26, i32 0, i32 0, i64 0, i8* null)
```
The first set of instructions corresponds to the first taskloop construct. It is important to note that the implicit taskgroup region associated with the taskloop construct has been materialized in our IR: the `__kmpc_taskloop` occurs inside a taskgroup region. Note also that this taskgroup region does not exist in our second taskloop because we are using the `nogroup` clause.
The issue here is the 4th argument of the kmpc_taskloop call, starting from the end, is always a zero. Checking the LLVM OpenMP RT implementation, we see that this argument corresponds to the nogroup parameter:
```
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup,
int sched, kmp_uint64 grainsize, void *task_dup);
```
So basically we always tell to the RT to do another taskgroup region. For the first taskloop, this means that we create two taskgroup regions. For the second example, it means that despite the fact we had a nogroup clause we are going to have a taskgroup region, so we unnecessary wait until all descendant tasks have been executed.
Reviewers: ABataev
Reviewed By: ABataev
Subscribers: rogfer01, cfe-commits
Differential Revision: https://reviews.llvm.org/D53636
llvm-svn: 345180
2018-10-25 03:06:37 +08:00
|
|
|
// CHECK: call void @__kmpc_taskloop(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 2, i64 [[NUM_TASKS]], i8* null)
|
2017-11-23 02:34:02 +08:00
|
|
|
#pragma omp taskloop simd shared(c) num_tasks(a) simdlen(8) safelen(64)
|
2016-04-28 20:14:51 +08:00
|
|
|
for (a = 0; a < c; ++a)
|
|
|
|
;
|
|
|
|
}
|
|
|
|
} s(1);
|
|
|
|
|
|
|
|
// CHECK: define internal i32 [[TASK4]](
|
2016-05-30 17:06:50 +08:00
|
|
|
// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 5
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]],
|
2016-05-30 17:06:50 +08:00
|
|
|
// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]],
|
2016-05-30 17:06:50 +08:00
|
|
|
// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
|
2016-05-30 17:06:50 +08:00
|
|
|
// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 8
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]],
|
|
|
|
// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]],
|
|
|
|
// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]],
|
|
|
|
// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]],
|
|
|
|
// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]],
|
|
|
|
// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
|
|
|
|
// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
|
|
|
|
// CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]],
|
|
|
|
// CHECK: br label
|
|
|
|
// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],
|
|
|
|
// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
|
|
|
|
// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],
|
|
|
|
// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
|
|
|
|
// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
|
|
|
|
// CHECK: load i32, i32* %
|
2018-12-21 05:24:54 +08:00
|
|
|
// CHECK-NOT: !llvm.access.group
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: store i32 %
|
2018-12-21 05:24:54 +08:00
|
|
|
// CHECK-NOT: !llvm.access.group
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: load i32, i32* %
|
2018-12-21 05:24:54 +08:00
|
|
|
// CHECK-NOT: !llvm.access.group
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: add nsw i32 %{{.+}}, 1
|
|
|
|
// CHECK: store i32 %{{.+}}, i32* %
|
2018-12-21 05:24:54 +08:00
|
|
|
// CHECK-NOT: !llvm.access.group
|
2016-04-28 20:14:51 +08:00
|
|
|
// CHECK: br label %{{.*}}!llvm.loop
|
|
|
|
// CHECK: ret i32 0
|
|
|
|
|
|
|
|
// CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
|
|
|
|
// CHECK: !{!"llvm.loop.vectorize.width", i32 4}
|
|
|
|
// CHECK: !{!"llvm.loop.vectorize.width", i32 32}
|
2017-11-23 02:34:02 +08:00
|
|
|
// CHECK: !{!"llvm.loop.vectorize.width", i32 8}
|
2016-04-28 20:14:51 +08:00
|
|
|
|
|
|
|
#endif
|