2016-05-05 16:46:22 +08:00
|
|
|
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s
|
|
|
|
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
|
|
|
|
// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
|
|
|
|
// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
|
|
|
|
// RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
|
|
|
|
// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=ARRAY %s
|
2017-12-30 02:07:07 +08:00
|
|
|
|
|
|
|
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
|
|
|
|
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
|
|
|
|
// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
|
|
|
|
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
|
|
|
|
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
|
|
|
|
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
|
|
|
|
// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
|
2016-05-05 16:46:22 +08:00
|
|
|
// expected-no-diagnostics
|
|
|
|
// It doesn't pass on win32.
|
2016-10-21 07:30:39 +08:00
|
|
|
// REQUIRES: shell
|
2016-05-05 16:46:22 +08:00
|
|
|
#ifndef ARRAY
|
|
|
|
#ifndef HEADER
|
|
|
|
#define HEADER
|
|
|
|
|
|
|
|
template <class T>
|
|
|
|
struct S {
|
|
|
|
T f;
|
|
|
|
S(T a) : f(a) {}
|
|
|
|
S() : f() {}
|
|
|
|
S(const S &s, T t = T()) : f(s.f + t) {}
|
|
|
|
operator T() { return T(); }
|
|
|
|
~S() {}
|
|
|
|
};
|
|
|
|
|
|
|
|
volatile double g;
|
|
|
|
|
2017-07-17 21:30:36 +08:00
|
|
|
// CHECK-DAG: [[KMP_TASK_T_TY:%.+]] = type { i8*, i32 (i32, i8*)*, i32, %union{{.+}}, %union{{.+}}, i64, i64, i64, i32, i8* }
|
2016-05-05 16:46:22 +08:00
|
|
|
// CHECK-DAG: [[S_DOUBLE_TY:%.+]] = type { double }
|
|
|
|
// CHECK-DAG: [[PRIVATES_MAIN_TY:%.+]] = type {{.?}}{ [2 x [[S_DOUBLE_TY]]], [[S_DOUBLE_TY]], i32, [2 x i32]
|
|
|
|
// CHECK-DAG: [[CAP_MAIN_TY:%.+]] = type { [2 x i32]*, i32*, [2 x [[S_DOUBLE_TY]]]*, [[S_DOUBLE_TY]]*, i{{[0-9]+}}* }
|
|
|
|
// CHECK-DAG: [[KMP_TASK_MAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [[PRIVATES_MAIN_TY]] }
|
|
|
|
// CHECK-DAG: [[S_INT_TY:%.+]] = type { i32 }
|
|
|
|
// CHECK-DAG: [[CAP_TMAIN_TY:%.+]] = type { [2 x i32]*, i32*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]* }
|
|
|
|
// CHECK-DAG: [[PRIVATES_TMAIN_TY:%.+]] = type { i32, [2 x i32], [2 x [[S_INT_TY]]], [[S_INT_TY]], [104 x i8] }
|
|
|
|
// CHECK-DAG: [[KMP_TASK_TMAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [{{[0-9]+}} x i8], [[PRIVATES_TMAIN_TY]] }
|
|
|
|
template <typename T>
|
|
|
|
T tmain() {
|
|
|
|
S<T> ttt;
|
|
|
|
S<T> test;
|
|
|
|
T t_var __attribute__((aligned(128))) = T();
|
|
|
|
T vec[] = {1, 2};
|
|
|
|
S<T> s_arr[] = {1, 2};
|
|
|
|
S<T> var(3);
|
|
|
|
#pragma omp taskloop lastprivate(t_var, vec, s_arr, s_arr, var, var)
|
|
|
|
for (int i = 0; i < 10; ++i) {
|
|
|
|
vec[0] = t_var;
|
|
|
|
s_arr[0] = var;
|
|
|
|
}
|
|
|
|
return T();
|
|
|
|
}
|
|
|
|
|
|
|
|
int main() {
|
|
|
|
static int sivar;
|
|
|
|
#ifdef LAMBDA
|
|
|
|
// LAMBDA: [[G:@.+]] = global double
|
|
|
|
// LAMBDA: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0,
|
|
|
|
// LAMBDA-LABEL: @main
|
|
|
|
// LAMBDA: call{{( x86_thiscallcc)?}} void [[OUTER_LAMBDA:@.+]](
|
|
|
|
[&]() {
|
|
|
|
// LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
|
2017-07-17 21:30:36 +08:00
|
|
|
// LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
|
2016-05-05 16:46:22 +08:00
|
|
|
// LAMBDA: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1
|
|
|
|
|
Do not always request an implicit taskgroup region inside the kmpc_taskloop function
Summary:
For the following code:
```
int i;
#pragma omp taskloop
for (i = 0; i < 100; ++i)
{}
#pragma omp taskloop nogroup
for (i = 0; i < 100; ++i)
{}
```
Clang emits the following LLVM IR:
```
...
call void @__kmpc_taskgroup(%struct.ident_t* @0, i32 %0)
%2 = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 %0, i32 1, i64 80, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
...
call void @__kmpc_taskloop(%struct.ident_t* @0, i32 %0, i8* %2, i32 1, i64* %8, i64* %9, i64 %13, i32 0, i32 0, i64 0, i8* null)
call void @__kmpc_end_taskgroup(%struct.ident_t* @0, i32 %0)
...
%15 = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 %0, i32 1, i64 80, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.1*)* @.omp_task_entry..2 to i32 (i32, i8*)*))
...
call void @__kmpc_taskloop(%struct.ident_t* @0, i32 %0, i8* %15, i32 1, i64* %21, i64* %22, i64 %26, i32 0, i32 0, i64 0, i8* null)
```
The first set of instructions corresponds to the first taskloop construct. It is important to note that the implicit taskgroup region associated with the taskloop construct has been materialized in our IR: the `__kmpc_taskloop` occurs inside a taskgroup region. Note also that this taskgroup region does not exist in our second taskloop because we are using the `nogroup` clause.
The issue here is the 4th argument of the kmpc_taskloop call, starting from the end, is always a zero. Checking the LLVM OpenMP RT implementation, we see that this argument corresponds to the nogroup parameter:
```
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup,
int sched, kmp_uint64 grainsize, void *task_dup);
```
So basically we always tell to the RT to do another taskgroup region. For the first taskloop, this means that we create two taskgroup regions. For the second example, it means that despite the fact we had a nogroup clause we are going to have a taskgroup region, so we unnecessary wait until all descendant tasks have been executed.
Reviewers: ABataev
Reviewed By: ABataev
Subscribers: rogfer01, cfe-commits
Differential Revision: https://reviews.llvm.org/D53636
llvm-svn: 345180
2018-10-25 03:06:37 +08:00
|
|
|
// LAMBDA: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_MAIN_TY:%[^*]+]]*, [[KMP_TASK_MAIN_TY]]*, i32)* [[MAIN_DUP:@.+]] to i8*))
|
2016-05-05 16:46:22 +08:00
|
|
|
// LAMBDA: ret
|
|
|
|
#pragma omp taskloop lastprivate(g, sivar)
|
|
|
|
for (int i = 0; i < 10; ++i) {
|
|
|
|
// LAMBDA: define {{.+}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG_PTR:%.+]])
|
|
|
|
// LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
|
|
|
|
// LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
|
|
|
|
// LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
|
|
|
|
// LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]]
|
|
|
|
// LAMBDA: store double 2.0{{.+}}, double* [[G_REF]]
|
|
|
|
|
|
|
|
// LAMBDA: store double* %{{.+}}, double** %{{.+}},
|
|
|
|
// LAMBDA: define internal i32 [[TASK_ENTRY]](i32, %{{.+}}* noalias)
|
|
|
|
g = 1;
|
|
|
|
sivar = 11;
|
|
|
|
// LAMBDA: store double 1.0{{.+}}, double* %{{.+}},
|
|
|
|
// LAMBDA: store i{{[0-9]+}} 11, i{{[0-9]+}}* %{{.+}},
|
|
|
|
// LAMBDA: call void [[INNER_LAMBDA]](%
|
|
|
|
// LAMBDA: icmp ne i32 %{{.+}}, 0
|
|
|
|
// LAMBDA: br i1
|
|
|
|
// LAMBDA: load double, double* %
|
|
|
|
// LAMBDA: store volatile double %
|
|
|
|
// LAMBDA: load i32, i32* %
|
|
|
|
// LAMBDA: store i32 %
|
|
|
|
// LAMBDA: ret
|
|
|
|
[&]() {
|
|
|
|
g = 2;
|
|
|
|
sivar = 22;
|
|
|
|
}();
|
|
|
|
}
|
|
|
|
}();
|
|
|
|
return 0;
|
|
|
|
#elif defined(BLOCKS)
|
|
|
|
// BLOCKS: [[G:@.+]] = global double
|
|
|
|
// BLOCKS-LABEL: @main
|
|
|
|
// BLOCKS: call void {{%.+}}(i8
|
|
|
|
^{
|
|
|
|
// BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
|
2017-07-17 21:30:36 +08:00
|
|
|
// BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
|
2016-05-05 16:46:22 +08:00
|
|
|
// BLOCKS: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1
|
Do not always request an implicit taskgroup region inside the kmpc_taskloop function
Summary:
For the following code:
```
int i;
#pragma omp taskloop
for (i = 0; i < 100; ++i)
{}
#pragma omp taskloop nogroup
for (i = 0; i < 100; ++i)
{}
```
Clang emits the following LLVM IR:
```
...
call void @__kmpc_taskgroup(%struct.ident_t* @0, i32 %0)
%2 = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 %0, i32 1, i64 80, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
...
call void @__kmpc_taskloop(%struct.ident_t* @0, i32 %0, i8* %2, i32 1, i64* %8, i64* %9, i64 %13, i32 0, i32 0, i64 0, i8* null)
call void @__kmpc_end_taskgroup(%struct.ident_t* @0, i32 %0)
...
%15 = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 %0, i32 1, i64 80, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.1*)* @.omp_task_entry..2 to i32 (i32, i8*)*))
...
call void @__kmpc_taskloop(%struct.ident_t* @0, i32 %0, i8* %15, i32 1, i64* %21, i64* %22, i64 %26, i32 0, i32 0, i64 0, i8* null)
```
The first set of instructions corresponds to the first taskloop construct. It is important to note that the implicit taskgroup region associated with the taskloop construct has been materialized in our IR: the `__kmpc_taskloop` occurs inside a taskgroup region. Note also that this taskgroup region does not exist in our second taskloop because we are using the `nogroup` clause.
The issue here is the 4th argument of the kmpc_taskloop call, starting from the end, is always a zero. Checking the LLVM OpenMP RT implementation, we see that this argument corresponds to the nogroup parameter:
```
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup,
int sched, kmp_uint64 grainsize, void *task_dup);
```
So basically we always tell to the RT to do another taskgroup region. For the first taskloop, this means that we create two taskgroup regions. For the second example, it means that despite the fact we had a nogroup clause we are going to have a taskgroup region, so we unnecessary wait until all descendant tasks have been executed.
Reviewers: ABataev
Reviewed By: ABataev
Subscribers: rogfer01, cfe-commits
Differential Revision: https://reviews.llvm.org/D53636
llvm-svn: 345180
2018-10-25 03:06:37 +08:00
|
|
|
// BLOCKS: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_MAIN_TY:%[^*]+]]*, [[KMP_TASK_MAIN_TY]]*, i32)* [[MAIN_DUP:@.+]] to i8*))
|
2016-05-05 16:46:22 +08:00
|
|
|
// BLOCKS: ret
|
|
|
|
#pragma omp taskloop lastprivate(g, sivar)
|
|
|
|
for (int i = 0; i < 10; ++i) {
|
|
|
|
// BLOCKS: define {{.+}} void {{@.+}}(i8*
|
|
|
|
// BLOCKS-NOT: [[G]]{{[[^:word:]]}}
|
|
|
|
// BLOCKS: store double 2.0{{.+}}, double*
|
|
|
|
// BLOCKS-NOT: [[G]]{{[[^:word:]]}}
|
|
|
|
// BLOCKS-NOT: [[ISVAR]]{{[[^:word:]]}}
|
|
|
|
// BLOCKS: store i{{[0-9]+}} 22, i{{[0-9]+}}*
|
|
|
|
// BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
|
|
|
|
// BLOCKS: ret
|
|
|
|
|
|
|
|
// BLOCKS: store double* %{{.+}}, double** %{{.+}},
|
|
|
|
// BLOCKS: store i{{[0-9]+}}* %{{.+}}, i{{[0-9]+}}** %{{.+}},
|
|
|
|
// BLOCKS: define internal i32 [[TASK_ENTRY]](i32, %{{.+}}* noalias)
|
|
|
|
g = 1;
|
|
|
|
sivar = 11;
|
|
|
|
// BLOCKS: store double 1.0{{.+}}, double* %{{.+}},
|
|
|
|
// BLOCKS-NOT: [[G]]{{[[^:word:]]}}
|
|
|
|
// BLOCKS: store i{{[0-9]+}} 11, i{{[0-9]+}}* %{{.+}},
|
|
|
|
// BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
|
|
|
|
// BLOCKS: call void {{%.+}}(i8
|
|
|
|
// BLOCKS: icmp ne i32 %{{.+}}, 0
|
|
|
|
// BLOCKS: br i1
|
|
|
|
// BLOCKS: load double, double* %
|
|
|
|
// BLOCKS: store volatile double %
|
|
|
|
// BLOCKS: load i32, i32* %
|
|
|
|
// BLOCKS: store i32 %
|
|
|
|
^{
|
|
|
|
g = 2;
|
|
|
|
sivar = 22;
|
|
|
|
}();
|
|
|
|
}
|
|
|
|
}();
|
|
|
|
return 0;
|
|
|
|
#else
|
|
|
|
S<double> ttt;
|
|
|
|
S<double> test;
|
|
|
|
int t_var = 0;
|
|
|
|
int vec[] = {1, 2};
|
|
|
|
S<double> s_arr[] = {1, 2};
|
|
|
|
S<double> var(3);
|
|
|
|
#pragma omp taskloop lastprivate(var, t_var, s_arr, vec, s_arr, var, sivar)
|
|
|
|
for (int i = 0; i < 10; ++i) {
|
|
|
|
vec[0] = t_var;
|
|
|
|
s_arr[0] = var;
|
|
|
|
sivar = 33;
|
|
|
|
}
|
|
|
|
return tmain<int>();
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
// CHECK: [[SIVAR:.+]] = internal global i{{[0-9]+}} 0,
|
|
|
|
// CHECK: define i{{[0-9]+}} @main()
|
|
|
|
// CHECK: alloca [[S_DOUBLE_TY]],
|
|
|
|
// CHECK: [[TEST:%.+]] = alloca [[S_DOUBLE_TY]],
|
|
|
|
// CHECK: [[T_VAR_ADDR:%.+]] = alloca i32,
|
|
|
|
// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i32],
|
|
|
|
// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]],
|
|
|
|
// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]],
|
|
|
|
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[LOC:%.+]])
|
|
|
|
|
2018-10-15 23:43:00 +08:00
|
|
|
// CHECK: call {{.*}} [[S_DOUBLE_TY_CONSTR:@.+]]([[S_DOUBLE_TY]]* [[TEST]])
|
2016-05-05 16:46:22 +08:00
|
|
|
|
|
|
|
// Store original variables in capture struct.
|
|
|
|
// CHECK: [[VEC_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
|
|
|
|
// CHECK: store [2 x i32]* [[VEC_ADDR]], [2 x i32]** [[VEC_REF]],
|
|
|
|
// CHECK: [[T_VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1
|
|
|
|
// CHECK: store i32* [[T_VAR_ADDR]], i32** [[T_VAR_REF]],
|
|
|
|
// CHECK: [[S_ARR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2
|
|
|
|
// CHECK: store [2 x [[S_DOUBLE_TY]]]* [[S_ARR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[S_ARR_REF]],
|
|
|
|
// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
|
|
|
|
// CHECK: store [[S_DOUBLE_TY]]* [[VAR_ADDR]], [[S_DOUBLE_TY]]** [[VAR_REF]],
|
|
|
|
// CHECK: [[SIVAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 4
|
|
|
|
// CHECK: store i{{[0-9]+}}* [[SIVAR]], i{{[0-9]+}}** [[SIVAR_REF]],
|
|
|
|
|
|
|
|
// Allocate task.
|
|
|
|
// Returns struct kmp_task_t {
|
|
|
|
// [[KMP_TASK_T]] task_data;
|
|
|
|
// [[KMP_TASK_MAIN_TY]] privates;
|
|
|
|
// };
|
2017-07-17 21:30:36 +08:00
|
|
|
// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 9, i64 120, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
|
2016-05-05 16:46:22 +08:00
|
|
|
// CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_MAIN_TY]]*
|
|
|
|
|
|
|
|
// Fill kmp_task_t->shareds by copying from original capture argument.
|
|
|
|
// CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
|
|
|
|
// CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
|
|
|
|
// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]],
|
|
|
|
// CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_MAIN_TY]]* %{{.+}} to i8*
|
Change memcpy/memove/memset to have dest and source alignment attributes (Step 1).
Summary:
Upstream LLVM is changing the the prototypes of the @llvm.memcpy/memmove/memset
intrinsics. This change updates the Clang tests for this change.
The @llvm.memcpy/memmove/memset intrinsics currently have an explicit argument
which is required to be a constant integer. It represents the alignment of the
dest (and source), and so must be the minimum of the actual alignment of the
two.
This change removes the alignment argument in favour of placing the alignment
attribute on the source and destination pointers of the memory intrinsic call.
For example, code which used to read:
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 100, i32 4, i1 false)
will now read
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 100, i1 false)
At this time the source and destination alignments must be the same (Step 1).
Step 2 of the change, to be landed shortly, will relax that contraint and allow
the source and destination to have different alignments.
llvm-svn: 322964
2018-01-20 01:12:54 +08:00
|
|
|
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 40, i1 false)
|
2016-05-05 16:46:22 +08:00
|
|
|
|
|
|
|
// Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes).
|
|
|
|
// Also copy address of private copy to the corresponding shareds reference.
|
|
|
|
// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
|
|
|
|
|
|
|
|
// Constructors for s_arr and var.
|
|
|
|
// s_arr;
|
|
|
|
// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
|
2018-10-15 23:43:00 +08:00
|
|
|
// CHECK: call {{.*}} [[S_DOUBLE_TY_CONSTR]]([[S_DOUBLE_TY]]* [[S_ARR_CUR:%[^,]+]])
|
2016-05-05 16:46:22 +08:00
|
|
|
// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* [[S_ARR_CUR]], i{{.+}} 1
|
|
|
|
// CHECK: icmp eq
|
|
|
|
// CHECK: br i1
|
|
|
|
|
|
|
|
// var;
|
|
|
|
// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1
|
2018-10-15 23:43:00 +08:00
|
|
|
// CHECK: call {{.*}} [[S_DOUBLE_TY_CONSTR]]([[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF]])
|
2016-05-05 16:46:22 +08:00
|
|
|
|
|
|
|
// t_var;
|
|
|
|
// vec;
|
|
|
|
// sivar;
|
|
|
|
|
|
|
|
// Provide pointer to destructor function, which will destroy private variables at the end of the task.
|
|
|
|
// CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 3
|
2016-05-30 17:06:50 +08:00
|
|
|
// CHECK: [[DESTRUCTORS_PTR:%.+]] = bitcast %union{{.+}}* [[DESTRUCTORS_REF]] to i32 (i32, i8*)**
|
|
|
|
// CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_PTR]],
|
2016-05-05 16:46:22 +08:00
|
|
|
|
|
|
|
// Start task.
|
Do not always request an implicit taskgroup region inside the kmpc_taskloop function
Summary:
For the following code:
```
int i;
#pragma omp taskloop
for (i = 0; i < 100; ++i)
{}
#pragma omp taskloop nogroup
for (i = 0; i < 100; ++i)
{}
```
Clang emits the following LLVM IR:
```
...
call void @__kmpc_taskgroup(%struct.ident_t* @0, i32 %0)
%2 = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 %0, i32 1, i64 80, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
...
call void @__kmpc_taskloop(%struct.ident_t* @0, i32 %0, i8* %2, i32 1, i64* %8, i64* %9, i64 %13, i32 0, i32 0, i64 0, i8* null)
call void @__kmpc_end_taskgroup(%struct.ident_t* @0, i32 %0)
...
%15 = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 %0, i32 1, i64 80, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.1*)* @.omp_task_entry..2 to i32 (i32, i8*)*))
...
call void @__kmpc_taskloop(%struct.ident_t* @0, i32 %0, i8* %15, i32 1, i64* %21, i64* %22, i64 %26, i32 0, i32 0, i64 0, i8* null)
```
The first set of instructions corresponds to the first taskloop construct. It is important to note that the implicit taskgroup region associated with the taskloop construct has been materialized in our IR: the `__kmpc_taskloop` occurs inside a taskgroup region. Note also that this taskgroup region does not exist in our second taskloop because we are using the `nogroup` clause.
The issue here is the 4th argument of the kmpc_taskloop call, starting from the end, is always a zero. Checking the LLVM OpenMP RT implementation, we see that this argument corresponds to the nogroup parameter:
```
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup,
int sched, kmp_uint64 grainsize, void *task_dup);
```
So basically we always tell to the RT to do another taskgroup region. For the first taskloop, this means that we create two taskgroup regions. For the second example, it means that despite the fact we had a nogroup clause we are going to have a taskgroup region, so we unnecessary wait until all descendant tasks have been executed.
Reviewers: ABataev
Reviewed By: ABataev
Subscribers: rogfer01, cfe-commits
Differential Revision: https://reviews.llvm.org/D53636
llvm-svn: 345180
2018-10-25 03:06:37 +08:00
|
|
|
// CHECK: call void @__kmpc_taskloop([[LOC]], i32 [[GTID]], i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_MAIN_TY]]*, [[KMP_TASK_MAIN_TY]]*, i32)* [[MAIN_DUP:@.+]] to i8*))
|
2016-05-05 16:46:22 +08:00
|
|
|
|
|
|
|
// CHECK: = call i{{.+}} [[TMAIN_INT:@.+]]()
|
|
|
|
|
|
|
|
// No destructors must be called for private copies of s_arr and var.
|
|
|
|
// CHECK-NOT: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2
|
|
|
|
// CHECK-NOT: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3
|
|
|
|
// CHECK: call void [[S_DOUBLE_TY_DESTR:@.+]]([[S_DOUBLE_TY]]*
|
|
|
|
// CHECK-NOT: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2
|
|
|
|
// CHECK-NOT: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3
|
|
|
|
// CHECK: ret
|
|
|
|
//
|
|
|
|
|
|
|
|
// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_MAIN_TY]]* noalias, [[S_DOUBLE_TY]]** noalias, i32** noalias, [2 x [[S_DOUBLE_TY]]]** noalias, [2 x i32]** noalias, i32** noalias)
|
|
|
|
// CHECK: [[PRIVATES:%.+]] = load [[PRIVATES_MAIN_TY]]*, [[PRIVATES_MAIN_TY]]**
|
|
|
|
// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 0
|
|
|
|
// CHECK: [[ARG3:%.+]] = load [2 x [[S_DOUBLE_TY]]]**, [2 x [[S_DOUBLE_TY]]]*** %{{.+}},
|
|
|
|
// CHECK: store [2 x [[S_DOUBLE_TY]]]* [[PRIV_S_VAR]], [2 x [[S_DOUBLE_TY]]]** [[ARG3]],
|
|
|
|
// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 1
|
|
|
|
// CHECK: [[ARG1:%.+]] = load [[S_DOUBLE_TY]]**, [[S_DOUBLE_TY]]*** {{.+}},
|
|
|
|
// CHECK: store [[S_DOUBLE_TY]]* [[PRIV_VAR]], [[S_DOUBLE_TY]]** [[ARG1]],
|
|
|
|
// CHECK: [[PRIV_T_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 2
|
|
|
|
// CHECK: [[ARG2:%.+]] = load i32**, i32*** %{{.+}},
|
|
|
|
// CHECK: store i32* [[PRIV_T_VAR]], i32** [[ARG2]],
|
|
|
|
// CHECK: [[PRIV_VEC:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 3
|
|
|
|
// CHECK: [[ARG4:%.+]] = load [2 x i32]**, [2 x i32]*** %{{.+}},
|
|
|
|
// CHECK: store [2 x i32]* [[PRIV_VEC]], [2 x i32]** [[ARG4]],
|
|
|
|
// CHECK: [[PRIV_SIVAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 4
|
|
|
|
// CHECK: [[ARG5:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** %{{.+}},
|
|
|
|
// CHECK: store i{{[0-9]+}}* [[PRIV_SIVAR]], i{{[0-9]+}}** [[ARG5]],
|
|
|
|
// CHECK: ret void
|
|
|
|
|
|
|
|
// CHECK: define internal i32 [[TASK_ENTRY]](i32, [[KMP_TASK_MAIN_TY]]* noalias)
|
|
|
|
|
|
|
|
// CHECK: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]]*,
|
|
|
|
// CHECK: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*,
|
|
|
|
// CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*,
|
|
|
|
// CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*,
|
|
|
|
// CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*,
|
|
|
|
// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]],
|
|
|
|
// CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]],
|
|
|
|
|
|
|
|
// CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]])
|
|
|
|
|
|
|
|
// CHECK: [[PRIV_VAR:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]],
|
|
|
|
// CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]],
|
|
|
|
// CHECK: [[PRIV_S_ARR:%.+]] = load [2 x [[S_DOUBLE_TY]]]*, [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]],
|
|
|
|
// CHECK: [[PRIV_VEC:%.+]] = load [2 x i32]*, [2 x i32]** [[PRIV_VEC_ADDR]],
|
|
|
|
// CHECK: [[PRIV_SIVAR:%.+]] = load i32*, i32** [[PRIV_SIVAR_ADDR]],
|
|
|
|
|
|
|
|
// Privates actually are used.
|
|
|
|
// CHECK-DAG: [[PRIV_VAR]]
|
|
|
|
// CHECK-DAG: [[PRIV_T_VAR]]
|
|
|
|
// CHECK-DAG: [[PRIV_S_ARR]]
|
|
|
|
// CHECK-DAG: [[PRIV_VEC]]
|
|
|
|
// CHECK-DAG: [[PRIV_SIVAR]]
|
|
|
|
|
|
|
|
// CHECK: icmp ne i32 %{{.+}}, 0
|
|
|
|
// CHECK-NEXT: br i1
|
|
|
|
// CHECK: bitcast [[S_DOUBLE_TY]]* %{{.+}} to i8*
|
|
|
|
// CHECK: bitcast [[S_DOUBLE_TY]]* %{{.+}} to i8*
|
Change memcpy/memove/memset to have dest and source alignment attributes (Step 1).
Summary:
Upstream LLVM is changing the the prototypes of the @llvm.memcpy/memmove/memset
intrinsics. This change updates the Clang tests for this change.
The @llvm.memcpy/memmove/memset intrinsics currently have an explicit argument
which is required to be a constant integer. It represents the alignment of the
dest (and source), and so must be the minimum of the actual alignment of the
two.
This change removes the alignment argument in favour of placing the alignment
attribute on the source and destination pointers of the memory intrinsic call.
For example, code which used to read:
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 100, i32 4, i1 false)
will now read
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 100, i1 false)
At this time the source and destination alignments must be the same (Step 1).
Step 2 of the change, to be landed shortly, will relax that contraint and allow
the source and destination to have different alignments.
llvm-svn: 322964
2018-01-20 01:12:54 +08:00
|
|
|
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} %
|
2016-05-05 16:46:22 +08:00
|
|
|
// CHECK: load i32, i32* %
|
|
|
|
// CHECK: store i32 %{{.+}}, i32* %
|
|
|
|
// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* %
|
|
|
|
// CHECK: phi [[S_DOUBLE_TY]]*
|
|
|
|
// CHECK: phi [[S_DOUBLE_TY]]*
|
Change memcpy/memove/memset to have dest and source alignment attributes (Step 1).
Summary:
Upstream LLVM is changing the the prototypes of the @llvm.memcpy/memmove/memset
intrinsics. This change updates the Clang tests for this change.
The @llvm.memcpy/memmove/memset intrinsics currently have an explicit argument
which is required to be a constant integer. It represents the alignment of the
dest (and source), and so must be the minimum of the actual alignment of the
two.
This change removes the alignment argument in favour of placing the alignment
attribute on the source and destination pointers of the memory intrinsic call.
For example, code which used to read:
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 100, i32 4, i1 false)
will now read
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 100, i1 false)
At this time the source and destination alignments must be the same (Step 1).
Step 2 of the change, to be landed shortly, will relax that contraint and allow
the source and destination to have different alignments.
llvm-svn: 322964
2018-01-20 01:12:54 +08:00
|
|
|
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} %
|
2016-05-05 16:46:22 +08:00
|
|
|
// CHECK: icmp eq [[S_DOUBLE_TY]]* %
|
|
|
|
// CHECK-NEXT: br i1
|
|
|
|
// CHECK: bitcast [2 x i32]* %{{.+}} to i8*
|
|
|
|
// CHECK: bitcast [2 x i32]* %{{.+}} to i8*
|
Change memcpy/memove/memset to have dest and source alignment attributes (Step 1).
Summary:
Upstream LLVM is changing the the prototypes of the @llvm.memcpy/memmove/memset
intrinsics. This change updates the Clang tests for this change.
The @llvm.memcpy/memmove/memset intrinsics currently have an explicit argument
which is required to be a constant integer. It represents the alignment of the
dest (and source), and so must be the minimum of the actual alignment of the
two.
This change removes the alignment argument in favour of placing the alignment
attribute on the source and destination pointers of the memory intrinsic call.
For example, code which used to read:
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 100, i32 4, i1 false)
will now read
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 100, i1 false)
At this time the source and destination alignments must be the same (Step 1).
Step 2 of the change, to be landed shortly, will relax that contraint and allow
the source and destination to have different alignments.
llvm-svn: 322964
2018-01-20 01:12:54 +08:00
|
|
|
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} %
|
2016-05-05 16:46:22 +08:00
|
|
|
// CHECK: load i32, i32* %
|
|
|
|
// CHECK: store i32 %{{.+}}, i32* %
|
|
|
|
// CHECK: br label
|
|
|
|
// CHECK: ret
|
|
|
|
|
|
|
|
// CHECK: define internal void [[MAIN_DUP]]([[KMP_TASK_MAIN_TY]]*, [[KMP_TASK_MAIN_TY]]*, i32)
|
|
|
|
// CHECK: getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* %{{.+}}, i32 0, i32 0
|
2016-05-30 17:06:50 +08:00
|
|
|
// CHECK: getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* %{{.+}}, i32 0, i32 8
|
2016-05-05 16:46:22 +08:00
|
|
|
// CHECK: load i32, i32* %
|
|
|
|
// CHECK: store i32 %{{.+}}, i32* %
|
|
|
|
// CHECK: getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* %{{.+}}, i32 0, i32 1
|
|
|
|
// CHECK: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* %{{.+}}, i32 0, i32 0
|
|
|
|
// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* %{{.+}}, i32 0, i32 0
|
|
|
|
// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i64 2
|
|
|
|
// CHECK: br label %
|
|
|
|
|
|
|
|
// CHECK: phi [[S_DOUBLE_TY]]*
|
|
|
|
// CHECK: call {{.*}} [[S_DOUBLE_TY_CONSTR]]([[S_DOUBLE_TY]]*
|
|
|
|
// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i64 1
|
|
|
|
// CHECK: icmp eq [[S_DOUBLE_TY]]* %
|
|
|
|
// CHECK: br i1 %
|
|
|
|
|
|
|
|
// CHECK: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* %{{.+}}, i32 0, i32 1
|
|
|
|
// CHECK: call {{.*}} [[S_DOUBLE_TY_CONSTR]]([[S_DOUBLE_TY]]*
|
|
|
|
// CHECK: ret void
|
|
|
|
|
|
|
|
// CHECK: define internal i32 [[DESTRUCTORS]](i32, [[KMP_TASK_MAIN_TY]]* noalias)
|
|
|
|
// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
|
|
|
|
// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0
|
|
|
|
// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1
|
|
|
|
// CHECK: call {{.*}} [[S_DOUBLE_TY_DESTR]]([[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF]])
|
|
|
|
// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0
|
|
|
|
// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} 2
|
|
|
|
// CHECK: [[PRIVATE_S_ARR_ELEM_REF:%.+]] = getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} -1
|
|
|
|
// CHECK: call {{.*}} [[S_DOUBLE_TY_DESTR]]([[S_DOUBLE_TY]]* [[PRIVATE_S_ARR_ELEM_REF]])
|
|
|
|
// CHECK: icmp eq
|
|
|
|
// CHECK: br i1
|
|
|
|
// CHECK: ret i32
|
|
|
|
|
|
|
|
// CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
|
|
|
|
// CHECK: alloca [[S_INT_TY]],
|
|
|
|
// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
|
|
|
|
// CHECK: [[T_VAR_ADDR:%.+]] = alloca i32, align 128
|
|
|
|
// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i32],
|
|
|
|
// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]],
|
|
|
|
// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]],
|
|
|
|
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[LOC:%.+]])
|
|
|
|
|
2018-10-15 23:43:00 +08:00
|
|
|
// CHECK: call {{.*}} [[S_INT_TY_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
|
2016-05-05 16:46:22 +08:00
|
|
|
|
|
|
|
// Store original variables in capture struct.
|
|
|
|
// CHECK: [[VEC_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
|
|
|
|
// CHECK: store [2 x i32]* [[VEC_ADDR]], [2 x i32]** [[VEC_REF]],
|
|
|
|
// CHECK: [[T_VAR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1
|
|
|
|
// CHECK: store i32* [[T_VAR_ADDR]], i32** [[T_VAR_REF]],
|
|
|
|
// CHECK: [[S_ARR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2
|
|
|
|
// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_ADDR]], [2 x [[S_INT_TY]]]** [[S_ARR_REF]],
|
|
|
|
// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
|
|
|
|
// CHECK: store [[S_INT_TY]]* [[VAR_ADDR]], [[S_INT_TY]]** [[VAR_REF]],
|
|
|
|
|
|
|
|
// Allocate task.
|
|
|
|
// Returns struct kmp_task_t {
|
|
|
|
// [[KMP_TASK_T_TY]] task_data;
|
|
|
|
// [[KMP_TASK_TMAIN_TY]] privates;
|
|
|
|
// };
|
2016-05-10 18:36:51 +08:00
|
|
|
// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 9, i64 256, i64 32, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
|
2016-05-05 16:46:22 +08:00
|
|
|
// CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_TMAIN_TY]]*
|
|
|
|
|
|
|
|
// Fill kmp_task_t->shareds by copying from original capture argument.
|
|
|
|
// CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
|
|
|
|
// CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
|
|
|
|
// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]],
|
|
|
|
// CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_TMAIN_TY]]* %{{.+}} to i8*
|
Change memcpy/memove/memset to have dest and source alignment attributes (Step 1).
Summary:
Upstream LLVM is changing the the prototypes of the @llvm.memcpy/memmove/memset
intrinsics. This change updates the Clang tests for this change.
The @llvm.memcpy/memmove/memset intrinsics currently have an explicit argument
which is required to be a constant integer. It represents the alignment of the
dest (and source), and so must be the minimum of the actual alignment of the
two.
This change removes the alignment argument in favour of placing the alignment
attribute on the source and destination pointers of the memory intrinsic call.
For example, code which used to read:
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 100, i32 4, i1 false)
will now read
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 100, i1 false)
At this time the source and destination alignments must be the same (Step 1).
Step 2 of the change, to be landed shortly, will relax that contraint and allow
the source and destination to have different alignments.
llvm-svn: 322964
2018-01-20 01:12:54 +08:00
|
|
|
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 32, i1 false)
|
2016-05-05 16:46:22 +08:00
|
|
|
|
|
|
|
// Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes).
|
|
|
|
// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
|
|
|
|
|
|
|
|
// t_var;
|
|
|
|
// vec;
|
|
|
|
|
|
|
|
// Constructors for s_arr and var.
|
|
|
|
// a_arr;
|
|
|
|
// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
|
|
|
|
// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0
|
|
|
|
// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} 2
|
2018-10-15 23:43:00 +08:00
|
|
|
// CHECK: call {{.*}} [[S_INT_TY_CONSTR]]([[S_INT_TY]]* [[S_ARR_CUR:%[^,]+]])
|
2016-05-05 16:46:22 +08:00
|
|
|
// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_CUR]], i{{.+}} 1
|
|
|
|
// CHECK: icmp eq
|
|
|
|
// CHECK: br i1
|
|
|
|
|
|
|
|
// var;
|
|
|
|
// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3
|
2018-10-15 23:43:00 +08:00
|
|
|
// CHECK: call {{.*}} [[S_INT_TY_CONSTR]]([[S_INT_TY]]* [[PRIVATE_VAR_REF]])
|
2016-05-05 16:46:22 +08:00
|
|
|
|
|
|
|
// Provide pointer to destructor function, which will destroy private variables at the end of the task.
|
|
|
|
// CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 3
|
2016-05-30 17:06:50 +08:00
|
|
|
// CHECK: [[DESTRUCTORS_PTR:%.+]] = bitcast %union{{.+}}* [[DESTRUCTORS_REF]] to i32 (i32, i8*)**
|
|
|
|
// CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_PTR]],
|
2016-05-05 16:46:22 +08:00
|
|
|
|
|
|
|
// Start task.
|
Do not always request an implicit taskgroup region inside the kmpc_taskloop function
Summary:
For the following code:
```
int i;
#pragma omp taskloop
for (i = 0; i < 100; ++i)
{}
#pragma omp taskloop nogroup
for (i = 0; i < 100; ++i)
{}
```
Clang emits the following LLVM IR:
```
...
call void @__kmpc_taskgroup(%struct.ident_t* @0, i32 %0)
%2 = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 %0, i32 1, i64 80, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
...
call void @__kmpc_taskloop(%struct.ident_t* @0, i32 %0, i8* %2, i32 1, i64* %8, i64* %9, i64 %13, i32 0, i32 0, i64 0, i8* null)
call void @__kmpc_end_taskgroup(%struct.ident_t* @0, i32 %0)
...
%15 = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 %0, i32 1, i64 80, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.1*)* @.omp_task_entry..2 to i32 (i32, i8*)*))
...
call void @__kmpc_taskloop(%struct.ident_t* @0, i32 %0, i8* %15, i32 1, i64* %21, i64* %22, i64 %26, i32 0, i32 0, i64 0, i8* null)
```
The first set of instructions corresponds to the first taskloop construct. It is important to note that the implicit taskgroup region associated with the taskloop construct has been materialized in our IR: the `__kmpc_taskloop` occurs inside a taskgroup region. Note also that this taskgroup region does not exist in our second taskloop because we are using the `nogroup` clause.
The issue here is the 4th argument of the kmpc_taskloop call, starting from the end, is always a zero. Checking the LLVM OpenMP RT implementation, we see that this argument corresponds to the nogroup parameter:
```
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup,
int sched, kmp_uint64 grainsize, void *task_dup);
```
So basically we always tell to the RT to do another taskgroup region. For the first taskloop, this means that we create two taskgroup regions. For the second example, it means that despite the fact we had a nogroup clause we are going to have a taskgroup region, so we unnecessary wait until all descendant tasks have been executed.
Reviewers: ABataev
Reviewed By: ABataev
Subscribers: rogfer01, cfe-commits
Differential Revision: https://reviews.llvm.org/D53636
llvm-svn: 345180
2018-10-25 03:06:37 +08:00
|
|
|
// CHECK: call void @__kmpc_taskloop([[LOC]], i32 [[GTID]], i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_TMAIN_TY]]*, [[KMP_TASK_TMAIN_TY]]*, i32)* [[TMAIN_DUP:@.+]] to i8*))
|
2016-05-05 16:46:22 +08:00
|
|
|
|
|
|
|
// No destructors must be called for private copies of s_arr and var.
|
|
|
|
// CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2
|
|
|
|
// CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3
|
|
|
|
// CHECK: call void [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
|
|
|
|
// CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2
|
|
|
|
// CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3
|
|
|
|
// CHECK: ret
|
|
|
|
//
|
|
|
|
|
|
|
|
// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_TMAIN_TY]]* noalias, i32** noalias, [2 x i32]** noalias, [2 x [[S_INT_TY]]]** noalias, [[S_INT_TY]]** noalias)
|
|
|
|
// CHECK: [[PRIVATES:%.+]] = load [[PRIVATES_TMAIN_TY]]*, [[PRIVATES_TMAIN_TY]]**
|
|
|
|
// CHECK: [[PRIV_T_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 0
|
|
|
|
// CHECK: [[ARG1:%.+]] = load i32**, i32*** %{{.+}},
|
|
|
|
// CHECK: store i32* [[PRIV_T_VAR]], i32** [[ARG1]],
|
|
|
|
// CHECK: [[PRIV_VEC:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 1
|
|
|
|
// CHECK: [[ARG2:%.+]] = load [2 x i32]**, [2 x i32]*** %{{.+}},
|
|
|
|
// CHECK: store [2 x i32]* [[PRIV_VEC]], [2 x i32]** [[ARG2]],
|
|
|
|
// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 2
|
|
|
|
// CHECK: [[ARG3:%.+]] = load [2 x [[S_INT_TY]]]**, [2 x [[S_INT_TY]]]*** %{{.+}},
|
|
|
|
// CHECK: store [2 x [[S_INT_TY]]]* [[PRIV_S_VAR]], [2 x [[S_INT_TY]]]** [[ARG3]],
|
|
|
|
// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 3
|
|
|
|
// CHECK: [[ARG4:%.+]] = load [[S_INT_TY]]**, [[S_INT_TY]]*** {{.+}},
|
|
|
|
// CHECK: store [[S_INT_TY]]* [[PRIV_VAR]], [[S_INT_TY]]** [[ARG4]],
|
|
|
|
// CHECK: ret void
|
|
|
|
|
|
|
|
// CHECK: define internal i32 [[TASK_ENTRY]](i32, [[KMP_TASK_TMAIN_TY]]* noalias)
|
|
|
|
// CHECK: alloca i32*,
|
|
|
|
// CHECK-DAG: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*,
|
|
|
|
// CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*,
|
|
|
|
// CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
|
|
|
|
// CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
|
|
|
|
// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]],
|
|
|
|
// CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]],
|
|
|
|
// CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]])
|
|
|
|
// CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]],
|
|
|
|
// CHECK: [[PRIV_VEC:%.+]] = load [2 x i32]*, [2 x i32]** [[PRIV_VEC_ADDR]],
|
|
|
|
// CHECK: [[PRIV_S_ARR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]],
|
|
|
|
// CHECK: [[PRIV_VAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[PRIV_VAR_ADDR]],
|
|
|
|
|
|
|
|
// Privates actually are used.
|
|
|
|
// CHECK-DAG: [[PRIV_VAR]]
|
|
|
|
// CHECK-DAG: [[PRIV_T_VAR]]
|
|
|
|
// CHECK-DAG: [[PRIV_S_ARR]]
|
|
|
|
// CHECK-DAG: [[PRIV_VEC]]
|
|
|
|
|
|
|
|
// CHECK: icmp ne i32 %{{.+}}, 0
|
|
|
|
// CHECK-NEXT: br i1
|
|
|
|
// CHECK: load i32, i32* %
|
|
|
|
// CHECK: store i32 %{{.+}}, i32* %
|
|
|
|
// CHECK: bitcast [2 x i32]* %{{.+}} to i8*
|
|
|
|
// CHECK: bitcast [2 x i32]* %{{.+}} to i8*
|
Change memcpy/memove/memset to have dest and source alignment attributes (Step 1).
Summary:
Upstream LLVM is changing the the prototypes of the @llvm.memcpy/memmove/memset
intrinsics. This change updates the Clang tests for this change.
The @llvm.memcpy/memmove/memset intrinsics currently have an explicit argument
which is required to be a constant integer. It represents the alignment of the
dest (and source), and so must be the minimum of the actual alignment of the
two.
This change removes the alignment argument in favour of placing the alignment
attribute on the source and destination pointers of the memory intrinsic call.
For example, code which used to read:
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 100, i32 4, i1 false)
will now read
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 100, i1 false)
At this time the source and destination alignments must be the same (Step 1).
Step 2 of the change, to be landed shortly, will relax that contraint and allow
the source and destination to have different alignments.
llvm-svn: 322964
2018-01-20 01:12:54 +08:00
|
|
|
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} %
|
2016-05-05 16:46:22 +08:00
|
|
|
// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* %
|
|
|
|
// CHECK: phi [[S_INT_TY]]*
|
|
|
|
// CHECK: phi [[S_INT_TY]]*
|
Change memcpy/memove/memset to have dest and source alignment attributes (Step 1).
Summary:
Upstream LLVM is changing the the prototypes of the @llvm.memcpy/memmove/memset
intrinsics. This change updates the Clang tests for this change.
The @llvm.memcpy/memmove/memset intrinsics currently have an explicit argument
which is required to be a constant integer. It represents the alignment of the
dest (and source), and so must be the minimum of the actual alignment of the
two.
This change removes the alignment argument in favour of placing the alignment
attribute on the source and destination pointers of the memory intrinsic call.
For example, code which used to read:
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 100, i32 4, i1 false)
will now read
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 100, i1 false)
At this time the source and destination alignments must be the same (Step 1).
Step 2 of the change, to be landed shortly, will relax that contraint and allow
the source and destination to have different alignments.
llvm-svn: 322964
2018-01-20 01:12:54 +08:00
|
|
|
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} %
|
2016-05-05 16:46:22 +08:00
|
|
|
// CHECK: icmp eq [[S_INT_TY]]* %
|
|
|
|
// CHECK-NEXT: br i1
|
|
|
|
// CHECK: bitcast [[S_INT_TY]]* %{{.+}} to i8*
|
|
|
|
// CHECK: bitcast [[S_INT_TY]]* %{{.+}} to i8*
|
Change memcpy/memove/memset to have dest and source alignment attributes (Step 1).
Summary:
Upstream LLVM is changing the the prototypes of the @llvm.memcpy/memmove/memset
intrinsics. This change updates the Clang tests for this change.
The @llvm.memcpy/memmove/memset intrinsics currently have an explicit argument
which is required to be a constant integer. It represents the alignment of the
dest (and source), and so must be the minimum of the actual alignment of the
two.
This change removes the alignment argument in favour of placing the alignment
attribute on the source and destination pointers of the memory intrinsic call.
For example, code which used to read:
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 100, i32 4, i1 false)
will now read
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 100, i1 false)
At this time the source and destination alignments must be the same (Step 1).
Step 2 of the change, to be landed shortly, will relax that contraint and allow
the source and destination to have different alignments.
llvm-svn: 322964
2018-01-20 01:12:54 +08:00
|
|
|
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} %
|
2016-05-05 16:46:22 +08:00
|
|
|
// CHECK: br label
|
|
|
|
// CHECK: ret
|
|
|
|
|
|
|
|
// CHECK: define internal void [[TMAIN_DUP]]([[KMP_TASK_TMAIN_TY]]*, [[KMP_TASK_TMAIN_TY]]*, i32)
|
|
|
|
// CHECK: getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* %{{.+}}, i32 0, i32 0
|
2016-05-30 17:06:50 +08:00
|
|
|
// CHECK: getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* %{{.+}}, i32 0, i32 8
|
2016-05-05 16:46:22 +08:00
|
|
|
// CHECK: load i32, i32* %
|
|
|
|
// CHECK: store i32 %{{.+}}, i32* %
|
|
|
|
// CHECK: getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* %{{.+}}, i32 0, i32 2
|
|
|
|
// CHECK: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* %{{.+}}, i32 0, i32 2
|
|
|
|
// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* %{{.+}}, i32 0, i32 0
|
|
|
|
// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i64 2
|
|
|
|
// CHECK: br label %
|
|
|
|
|
|
|
|
// CHECK: phi [[S_INT_TY]]*
|
|
|
|
// CHECK: call {{.*}} [[S_INT_TY_CONSTR]]([[S_INT_TY]]*
|
|
|
|
// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i64 1
|
|
|
|
// CHECK: icmp eq [[S_INT_TY]]* %
|
|
|
|
// CHECK: br i1 %
|
|
|
|
|
|
|
|
// CHECK: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* %{{.+}}, i32 0, i32 3
|
|
|
|
// CHECK: call {{.*}} [[S_INT_TY_CONSTR]]([[S_INT_TY]]*
|
|
|
|
// CHECK: ret void
|
|
|
|
|
|
|
|
// CHECK: define internal i32 [[DESTRUCTORS]](i32, [[KMP_TASK_TMAIN_TY]]* noalias)
|
|
|
|
// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
|
|
|
|
// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2
|
|
|
|
// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3
|
|
|
|
// CHECK: call void [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[PRIVATE_VAR_REF]])
|
|
|
|
// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0
|
|
|
|
// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} 2
|
|
|
|
// CHECK: [[PRIVATE_S_ARR_ELEM_REF:%.+]] = getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} -1
|
|
|
|
// CHECK: call void [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[PRIVATE_S_ARR_ELEM_REF]])
|
|
|
|
// CHECK: icmp eq
|
|
|
|
// CHECK: br i1
|
|
|
|
// CHECK: ret i32
|
|
|
|
|
|
|
|
#endif
|
|
|
|
#else
|
|
|
|
// ARRAY-LABEL: array_func
|
|
|
|
struct St {
|
|
|
|
int a, b;
|
|
|
|
St() : a(0), b(0) {}
|
|
|
|
St(const St &) {}
|
|
|
|
~St() {}
|
|
|
|
};
|
|
|
|
|
|
|
|
void array_func(int n, float a[n], St s[2]) {
|
|
|
|
// ARRAY: call i8* @__kmpc_omp_task_alloc(
|
|
|
|
// ARRAY: call void @__kmpc_taskloop(
|
|
|
|
// ARRAY: store float** %{{.+}}, float*** %{{.+}},
|
|
|
|
// ARRAY: store %struct.St** %{{.+}}, %struct.St*** %{{.+}},
|
|
|
|
// ARRAY: icmp ne i32 %{{.+}}, 0
|
|
|
|
// ARRAY: store float* %{{.+}}, float** %{{.+}},
|
|
|
|
// ARRAY: store %struct.St* %{{.+}}, %struct.St** %{{.+}},
|
|
|
|
#pragma omp taskloop lastprivate(a, s)
|
|
|
|
for (int i = 0; i < 10; ++i)
|
|
|
|
;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|