2021-03-04 07:15:32 +08:00
|
|
|
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs
|
|
|
|
// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
|
|
|
|
// expected-no-diagnostics
|
|
|
|
|
|
|
|
#ifndef HEADER
|
|
|
|
#define HEADER
|
|
|
|
|
|
|
|
struct MyIterator {
|
|
|
|
MyIterator(unsigned pos);
|
|
|
|
MyIterator(const MyIterator &other);
|
|
|
|
const MyIterator &operator=(const MyIterator &that);
|
|
|
|
MyIterator &operator++();
|
|
|
|
int operator-(const MyIterator &that) const;
|
|
|
|
MyIterator &operator+=(unsigned a);
|
|
|
|
MyIterator operator+(unsigned a) const;
|
|
|
|
bool operator==(const MyIterator &that) const;
|
|
|
|
bool operator!=(const MyIterator &that) const;
|
|
|
|
unsigned operator*() const;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct MyRange {
|
|
|
|
MyRange(int n);
|
|
|
|
|
|
|
|
MyIterator begin();
|
|
|
|
MyIterator end();
|
|
|
|
};
|
|
|
|
|
|
|
|
extern "C" void workshareloop_rangefor(float *a, float *b, float *c) {
|
|
|
|
#pragma omp for
|
|
|
|
for (unsigned i : MyRange(42)) {
|
|
|
|
a[i] = b[i] * c[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif // HEADER
|
|
|
|
// CHECK-LABEL: define {{[^@]+}}@workshareloop_rangefor
|
[OpenMP] Overhaul `declare target` handling
This patch fixes various issues with our prior `declare target` handling
and extends it to support `omp begin declare target` as well.
This started with PR49649 in mind, trying to provide a way for users to
avoid the "ref" global use introduced for globals with internal linkage.
From there it went down the rabbit hole, e.g., all variables, even
`nohost` ones, were emitted into the device code so it was impossible to
determine if "ref" was needed late in the game (based on the name only).
To make it really useful, `begin declare target` was needed as it can
carry the `device_type`. Not emitting variables eagerly had a ripple
effect. Finally, the precedence of the (explicit) declare target list
items needed to be taken into account, that meant we cannot just look
for any declare target attribute to make a decision. This caused the
handling of functions to require fixup as well.
I tried to clean up things while I was at it, e.g., we should not "parse
declarations and defintions" as part of OpenMP parsing, this will always
break at some point. Instead, we keep track what region we are in and
act on definitions and declarations instead, this is what we do for
declare variant and other begin/end directives already.
Highlights:
- new diagnosis for restrictions specificed in the standard,
- delayed emission of globals not mentioned in an explicit
list of a declare target,
- omission of `nohost` globals on the host and `host` globals on the
device,
- no explicit parsing of declarations in-between `omp [begin] declare
variant` and the corresponding end anymore, regular parsing instead,
- precedence for explicit mentions in `declare target` lists over
implicit mentions in the declaration-definition-seq, and
- `omp allocate` declarations will now replace an earlier emitted
global, if necessary.
---
Notes:
The patch is larger than I hoped but it turns out that most changes do
on their own lead to "inconsistent states", which seem less desirable
overall.
After working through this I feel the standard should remove the
explicit declare target forms as the delayed emission is horrible.
That said, while we delay things anyway, it seems to me we check too
often for the current status even though that is often not sufficient to
act upon. There seems to be a lot of duplication that can probably be
trimmed down. Eagerly emitting some things seems pretty weak as an
argument to keep so much logic around.
---
Reviewed By: ABataev
Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
|
|
|
// CHECK-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]]) #[[ATTR0:[0-9]+]] {
|
2021-03-04 07:15:32 +08:00
|
|
|
// CHECK-NEXT: entry:
|
|
|
|
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8
|
|
|
|
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8
|
|
|
|
// CHECK-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8
|
|
|
|
// CHECK-NEXT: [[__RANGE2:%.*]] = alloca %struct.MyRange*, align 8
|
|
|
|
// CHECK-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_MYRANGE:%.*]], align 1
|
|
|
|
// CHECK-NEXT: [[__BEGIN2:%.*]] = alloca [[STRUCT_MYITERATOR:%.*]], align 1
|
|
|
|
// CHECK-NEXT: [[__END2:%.*]] = alloca [[STRUCT_MYITERATOR]], align 1
|
|
|
|
// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
|
|
// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8
|
|
|
|
// CHECK-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1
|
|
|
|
// CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i64, align 8
|
|
|
|
// CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4
|
|
|
|
// CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i64, align 8
|
|
|
|
// CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i64, align 8
|
|
|
|
// CHECK-NEXT: [[P_STRIDE:%.*]] = alloca i64, align 8
|
|
|
|
// CHECK-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
|
|
|
|
// CHECK-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
|
|
|
|
// CHECK-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
|
2021-04-10 15:41:16 +08:00
|
|
|
// CHECK-NEXT: call void @_ZN7MyRangeC1Ei(%struct.MyRange* nonnull dereferenceable(1) [[REF_TMP]], i32 42)
|
2021-03-04 07:15:32 +08:00
|
|
|
// CHECK-NEXT: store %struct.MyRange* [[REF_TMP]], %struct.MyRange** [[__RANGE2]], align 8
|
|
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load %struct.MyRange*, %struct.MyRange** [[__RANGE2]], align 8
|
[OpenMP] Overhaul `declare target` handling
This patch fixes various issues with our prior `declare target` handling
and extends it to support `omp begin declare target` as well.
This started with PR49649 in mind, trying to provide a way for users to
avoid the "ref" global use introduced for globals with internal linkage.
From there it went down the rabbit hole, e.g., all variables, even
`nohost` ones, were emitted into the device code so it was impossible to
determine if "ref" was needed late in the game (based on the name only).
To make it really useful, `begin declare target` was needed as it can
carry the `device_type`. Not emitting variables eagerly had a ripple
effect. Finally, the precedence of the (explicit) declare target list
items needed to be taken into account, that meant we cannot just look
for any declare target attribute to make a decision. This caused the
handling of functions to require fixup as well.
I tried to clean up things while I was at it, e.g., we should not "parse
declarations and defintions" as part of OpenMP parsing, this will always
break at some point. Instead, we keep track what region we are in and
act on definitions and declarations instead, this is what we do for
declare variant and other begin/end directives already.
Highlights:
- new diagnosis for restrictions specificed in the standard,
- delayed emission of globals not mentioned in an explicit
list of a declare target,
- omission of `nohost` globals on the host and `host` globals on the
device,
- no explicit parsing of declarations in-between `omp [begin] declare
variant` and the corresponding end anymore, regular parsing instead,
- precedence for explicit mentions in `declare target` lists over
implicit mentions in the declaration-definition-seq, and
- `omp allocate` declarations will now replace an earlier emitted
global, if necessary.
---
Notes:
The patch is larger than I hoped but it turns out that most changes do
on their own lead to "inconsistent states", which seem less desirable
overall.
After working through this I feel the standard should remove the
explicit declare target forms as the delayed emission is horrible.
That said, while we delay things anyway, it seems to me we check too
often for the current status even though that is often not sufficient to
act upon. There seems to be a lot of duplication that can probably be
trimmed down. Eagerly emitting some things seems pretty weak as an
argument to keep so much logic around.
---
Reviewed By: ABataev
Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
|
|
|
// CHECK-NEXT: call void @_ZN7MyRange5beginEv(%struct.MyIterator* sret([[STRUCT_MYITERATOR]]) align 1 [[__BEGIN2]], %struct.MyRange* nonnull dereferenceable(1) [[TMP0]])
|
2021-03-04 07:15:32 +08:00
|
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load %struct.MyRange*, %struct.MyRange** [[__RANGE2]], align 8
|
[OpenMP] Overhaul `declare target` handling
This patch fixes various issues with our prior `declare target` handling
and extends it to support `omp begin declare target` as well.
This started with PR49649 in mind, trying to provide a way for users to
avoid the "ref" global use introduced for globals with internal linkage.
From there it went down the rabbit hole, e.g., all variables, even
`nohost` ones, were emitted into the device code so it was impossible to
determine if "ref" was needed late in the game (based on the name only).
To make it really useful, `begin declare target` was needed as it can
carry the `device_type`. Not emitting variables eagerly had a ripple
effect. Finally, the precedence of the (explicit) declare target list
items needed to be taken into account, that meant we cannot just look
for any declare target attribute to make a decision. This caused the
handling of functions to require fixup as well.
I tried to clean up things while I was at it, e.g., we should not "parse
declarations and defintions" as part of OpenMP parsing, this will always
break at some point. Instead, we keep track what region we are in and
act on definitions and declarations instead, this is what we do for
declare variant and other begin/end directives already.
Highlights:
- new diagnosis for restrictions specificed in the standard,
- delayed emission of globals not mentioned in an explicit
list of a declare target,
- omission of `nohost` globals on the host and `host` globals on the
device,
- no explicit parsing of declarations in-between `omp [begin] declare
variant` and the corresponding end anymore, regular parsing instead,
- precedence for explicit mentions in `declare target` lists over
implicit mentions in the declaration-definition-seq, and
- `omp allocate` declarations will now replace an earlier emitted
global, if necessary.
---
Notes:
The patch is larger than I hoped but it turns out that most changes do
on their own lead to "inconsistent states", which seem less desirable
overall.
After working through this I feel the standard should remove the
explicit declare target forms as the delayed emission is horrible.
That said, while we delay things anyway, it seems to me we check too
often for the current status even though that is often not sufficient to
act upon. There seems to be a lot of duplication that can probably be
trimmed down. Eagerly emitting some things seems pretty weak as an
argument to keep so much logic around.
---
Reviewed By: ABataev
Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
|
|
|
// CHECK-NEXT: call void @_ZN7MyRange3endEv(%struct.MyIterator* sret([[STRUCT_MYITERATOR]]) align 1 [[__END2]], %struct.MyRange* nonnull dereferenceable(1) [[TMP1]])
|
2021-04-10 15:41:16 +08:00
|
|
|
// CHECK-NEXT: [[CALL:%.*]] = call i32 @_ZNK10MyIteratordeEv(%struct.MyIterator* nonnull dereferenceable(1) [[__BEGIN2]])
|
2021-03-04 07:15:32 +08:00
|
|
|
// CHECK-NEXT: store i32 [[CALL]], i32* [[I]], align 4
|
|
|
|
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0
|
|
|
|
// CHECK-NEXT: store %struct.MyIterator* [[__BEGIN2]], %struct.MyIterator** [[TMP2]], align 8
|
|
|
|
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1
|
|
|
|
// CHECK-NEXT: store %struct.MyIterator* [[__END2]], %struct.MyIterator** [[TMP3]], align 8
|
|
|
|
// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED1]], i32 0, i32 0
|
2021-04-10 15:41:16 +08:00
|
|
|
// CHECK-NEXT: call void @_ZN10MyIteratorC1ERKS_(%struct.MyIterator* nonnull dereferenceable(1) [[TMP4]], %struct.MyIterator* nonnull align 1 dereferenceable(1) [[__BEGIN2]])
|
2021-03-04 07:15:32 +08:00
|
|
|
// CHECK-NEXT: call void @__captured_stmt(i64* [[DOTCOUNT_ADDR]], %struct.anon* [[AGG_CAPTURED]])
|
|
|
|
// CHECK-NEXT: [[DOTCOUNT:%.*]] = load i64, i64* [[DOTCOUNT_ADDR]], align 8
|
|
|
|
// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]]
|
|
|
|
// CHECK: omp_loop.preheader:
|
|
|
|
// CHECK-NEXT: store i64 0, i64* [[P_LOWERBOUND]], align 8
|
|
|
|
// CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[DOTCOUNT]], 1
|
|
|
|
// CHECK-NEXT: store i64 [[TMP5]], i64* [[P_UPPERBOUND]], align 8
|
|
|
|
// CHECK-NEXT: store i64 1, i64* [[P_STRIDE]], align 8
|
[OpenMP] Overhaul `declare target` handling
This patch fixes various issues with our prior `declare target` handling
and extends it to support `omp begin declare target` as well.
This started with PR49649 in mind, trying to provide a way for users to
avoid the "ref" global use introduced for globals with internal linkage.
From there it went down the rabbit hole, e.g., all variables, even
`nohost` ones, were emitted into the device code so it was impossible to
determine if "ref" was needed late in the game (based on the name only).
To make it really useful, `begin declare target` was needed as it can
carry the `device_type`. Not emitting variables eagerly had a ripple
effect. Finally, the precedence of the (explicit) declare target list
items needed to be taken into account, that meant we cannot just look
for any declare target attribute to make a decision. This caused the
handling of functions to require fixup as well.
I tried to clean up things while I was at it, e.g., we should not "parse
declarations and defintions" as part of OpenMP parsing, this will always
break at some point. Instead, we keep track what region we are in and
act on definitions and declarations instead, this is what we do for
declare variant and other begin/end directives already.
Highlights:
- new diagnosis for restrictions specificed in the standard,
- delayed emission of globals not mentioned in an explicit
list of a declare target,
- omission of `nohost` globals on the host and `host` globals on the
device,
- no explicit parsing of declarations in-between `omp [begin] declare
variant` and the corresponding end anymore, regular parsing instead,
- precedence for explicit mentions in `declare target` lists over
implicit mentions in the declaration-definition-seq, and
- `omp allocate` declarations will now replace an earlier emitted
global, if necessary.
---
Notes:
The patch is larger than I hoped but it turns out that most changes do
on their own lead to "inconsistent states", which seem less desirable
overall.
After working through this I feel the standard should remove the
explicit declare target forms as the delayed emission is horrible.
That said, while we delay things anyway, it seems to me we check too
often for the current status even though that is often not sufficient to
act upon. There seems to be a lot of duplication that can probably be
trimmed down. Eagerly emitting some things seems pretty weak as an
argument to keep so much logic around.
---
Reviewed By: ABataev
Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
|
|
|
// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
|
|
|
|
// CHECK-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[P_LASTITER]], i64* [[P_LOWERBOUND]], i64* [[P_UPPERBOUND]], i64* [[P_STRIDE]], i64 1, i64 1)
|
2021-03-04 07:15:32 +08:00
|
|
|
// CHECK-NEXT: [[TMP6:%.*]] = load i64, i64* [[P_LOWERBOUND]], align 8
|
|
|
|
// CHECK-NEXT: [[TMP7:%.*]] = load i64, i64* [[P_UPPERBOUND]], align 8
|
|
|
|
// CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], [[TMP6]]
|
|
|
|
// CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 1
|
|
|
|
// CHECK-NEXT: br label [[OMP_LOOP_HEADER:%.*]]
|
|
|
|
// CHECK: omp_loop.header:
|
|
|
|
// CHECK-NEXT: [[OMP_LOOP_IV:%.*]] = phi i64 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ]
|
|
|
|
// CHECK-NEXT: br label [[OMP_LOOP_COND:%.*]]
|
|
|
|
// CHECK: omp_loop.cond:
|
|
|
|
// CHECK-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i64 [[OMP_LOOP_IV]], [[TMP9]]
|
|
|
|
// CHECK-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]]
|
|
|
|
// CHECK: omp_loop.body:
|
|
|
|
// CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OMP_LOOP_IV]], [[TMP6]]
|
|
|
|
// CHECK-NEXT: call void @__captured_stmt.1(i32* [[I]], i64 [[TMP10]], %struct.anon.0* [[AGG_CAPTURED1]])
|
|
|
|
// CHECK-NEXT: [[TMP11:%.*]] = load float*, float** [[B_ADDR]], align 8
|
|
|
|
// CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4
|
|
|
|
// CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP12]] to i64
|
|
|
|
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[IDXPROM]]
|
|
|
|
// CHECK-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4
|
|
|
|
// CHECK-NEXT: [[TMP14:%.*]] = load float*, float** [[C_ADDR]], align 8
|
|
|
|
// CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4
|
|
|
|
// CHECK-NEXT: [[IDXPROM2:%.*]] = zext i32 [[TMP15]] to i64
|
|
|
|
// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM2]]
|
|
|
|
// CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX3]], align 4
|
|
|
|
// CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP13]], [[TMP16]]
|
|
|
|
// CHECK-NEXT: [[TMP17:%.*]] = load float*, float** [[A_ADDR]], align 8
|
|
|
|
// CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4
|
|
|
|
// CHECK-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP18]] to i64
|
|
|
|
// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM4]]
|
|
|
|
// CHECK-NEXT: store float [[MUL]], float* [[ARRAYIDX5]], align 4
|
|
|
|
// CHECK-NEXT: br label [[OMP_LOOP_INC]]
|
|
|
|
// CHECK: omp_loop.inc:
|
|
|
|
// CHECK-NEXT: [[OMP_LOOP_NEXT]] = add nuw i64 [[OMP_LOOP_IV]], 1
|
|
|
|
// CHECK-NEXT: br label [[OMP_LOOP_HEADER]]
|
|
|
|
// CHECK: omp_loop.exit:
|
[OpenMP] Overhaul `declare target` handling
This patch fixes various issues with our prior `declare target` handling
and extends it to support `omp begin declare target` as well.
This started with PR49649 in mind, trying to provide a way for users to
avoid the "ref" global use introduced for globals with internal linkage.
From there it went down the rabbit hole, e.g., all variables, even
`nohost` ones, were emitted into the device code so it was impossible to
determine if "ref" was needed late in the game (based on the name only).
To make it really useful, `begin declare target` was needed as it can
carry the `device_type`. Not emitting variables eagerly had a ripple
effect. Finally, the precedence of the (explicit) declare target list
items needed to be taken into account, that meant we cannot just look
for any declare target attribute to make a decision. This caused the
handling of functions to require fixup as well.
I tried to clean up things while I was at it, e.g., we should not "parse
declarations and defintions" as part of OpenMP parsing, this will always
break at some point. Instead, we keep track what region we are in and
act on definitions and declarations instead, this is what we do for
declare variant and other begin/end directives already.
Highlights:
- new diagnosis for restrictions specificed in the standard,
- delayed emission of globals not mentioned in an explicit
list of a declare target,
- omission of `nohost` globals on the host and `host` globals on the
device,
- no explicit parsing of declarations in-between `omp [begin] declare
variant` and the corresponding end anymore, regular parsing instead,
- precedence for explicit mentions in `declare target` lists over
implicit mentions in the declaration-definition-seq, and
- `omp allocate` declarations will now replace an earlier emitted
global, if necessary.
---
Notes:
The patch is larger than I hoped but it turns out that most changes do
on their own lead to "inconsistent states", which seem less desirable
overall.
After working through this I feel the standard should remove the
explicit declare target forms as the delayed emission is horrible.
That said, while we delay things anyway, it seems to me we check too
often for the current status even though that is often not sufficient to
act upon. There seems to be a lot of duplication that can probably be
trimmed down. Eagerly emitting some things seems pretty weak as an
argument to keep so much logic around.
---
Reviewed By: ABataev
Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
|
|
|
// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
|
|
|
|
// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
|
|
|
|
// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
|
2021-03-04 07:15:32 +08:00
|
|
|
// CHECK-NEXT: br label [[OMP_LOOP_AFTER:%.*]]
|
|
|
|
// CHECK: omp_loop.after:
|
|
|
|
// CHECK-NEXT: ret void
|
|
|
|
//
|
|
|
|
//
|
|
|
|
// CHECK-LABEL: define {{[^@]+}}@__captured_stmt
|
[OpenMP] Overhaul `declare target` handling
This patch fixes various issues with our prior `declare target` handling
and extends it to support `omp begin declare target` as well.
This started with PR49649 in mind, trying to provide a way for users to
avoid the "ref" global use introduced for globals with internal linkage.
From there it went down the rabbit hole, e.g., all variables, even
`nohost` ones, were emitted into the device code so it was impossible to
determine if "ref" was needed late in the game (based on the name only).
To make it really useful, `begin declare target` was needed as it can
carry the `device_type`. Not emitting variables eagerly had a ripple
effect. Finally, the precedence of the (explicit) declare target list
items needed to be taken into account, that meant we cannot just look
for any declare target attribute to make a decision. This caused the
handling of functions to require fixup as well.
I tried to clean up things while I was at it, e.g., we should not "parse
declarations and defintions" as part of OpenMP parsing, this will always
break at some point. Instead, we keep track what region we are in and
act on definitions and declarations instead, this is what we do for
declare variant and other begin/end directives already.
Highlights:
- new diagnosis for restrictions specificed in the standard,
- delayed emission of globals not mentioned in an explicit
list of a declare target,
- omission of `nohost` globals on the host and `host` globals on the
device,
- no explicit parsing of declarations in-between `omp [begin] declare
variant` and the corresponding end anymore, regular parsing instead,
- precedence for explicit mentions in `declare target` lists over
implicit mentions in the declaration-definition-seq, and
- `omp allocate` declarations will now replace an earlier emitted
global, if necessary.
---
Notes:
The patch is larger than I hoped but it turns out that most changes do
on their own lead to "inconsistent states", which seem less desirable
overall.
After working through this I feel the standard should remove the
explicit declare target forms as the delayed emission is horrible.
That said, while we delay things anyway, it seems to me we check too
often for the current status even though that is often not sufficient to
act upon. There seems to be a lot of duplication that can probably be
trimmed down. Eagerly emitting some things seems pretty weak as an
argument to keep so much logic around.
---
Reviewed By: ABataev
Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
|
|
|
// CHECK-SAME: (i64* nonnull align 8 dereferenceable(8) [[DISTANCE:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] {
|
2021-03-04 07:15:32 +08:00
|
|
|
// CHECK-NEXT: entry:
|
|
|
|
// CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca i64*, align 8
|
|
|
|
// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8
|
|
|
|
// CHECK-NEXT: [[DOTSTART:%.*]] = alloca [[STRUCT_MYITERATOR:%.*]], align 1
|
|
|
|
// CHECK-NEXT: [[DOTSTOP:%.*]] = alloca [[STRUCT_MYITERATOR]], align 1
|
|
|
|
// CHECK-NEXT: [[DOTSTEP:%.*]] = alloca i64, align 8
|
|
|
|
// CHECK-NEXT: store i64* [[DISTANCE]], i64** [[DISTANCE_ADDR]], align 8
|
|
|
|
// CHECK-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8
|
|
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8
|
|
|
|
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0
|
|
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load %struct.MyIterator*, %struct.MyIterator** [[TMP1]], align 8
|
2021-04-10 15:41:16 +08:00
|
|
|
// CHECK-NEXT: call void @_ZN10MyIteratorC1ERKS_(%struct.MyIterator* nonnull dereferenceable(1) [[DOTSTART]], %struct.MyIterator* nonnull align 1 dereferenceable(1) [[TMP2]])
|
2021-03-04 07:15:32 +08:00
|
|
|
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1
|
|
|
|
// CHECK-NEXT: [[TMP4:%.*]] = load %struct.MyIterator*, %struct.MyIterator** [[TMP3]], align 8
|
2021-04-10 15:41:16 +08:00
|
|
|
// CHECK-NEXT: call void @_ZN10MyIteratorC1ERKS_(%struct.MyIterator* nonnull dereferenceable(1) [[DOTSTOP]], %struct.MyIterator* nonnull align 1 dereferenceable(1) [[TMP4]])
|
2021-03-04 07:15:32 +08:00
|
|
|
// CHECK-NEXT: store i64 1, i64* [[DOTSTEP]], align 8
|
|
|
|
// CHECK-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTSTEP]], align 8
|
|
|
|
// CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP5]], 0
|
|
|
|
// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
|
|
|
// CHECK: cond.true:
|
2021-04-10 15:41:16 +08:00
|
|
|
// CHECK-NEXT: [[CALL:%.*]] = call i32 @_ZNK10MyIteratormiERKS_(%struct.MyIterator* nonnull dereferenceable(1) [[DOTSTART]], %struct.MyIterator* nonnull align 1 dereferenceable(1) [[DOTSTOP]])
|
2021-03-04 07:15:32 +08:00
|
|
|
// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[CALL]] to i64
|
|
|
|
// CHECK-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTSTEP]], align 8
|
|
|
|
// CHECK-NEXT: [[SUB:%.*]] = sub i64 0, [[TMP6]]
|
|
|
|
// CHECK-NEXT: [[DIV:%.*]] = udiv i64 [[CONV]], [[SUB]]
|
|
|
|
// CHECK-NEXT: br label [[COND_END:%.*]]
|
|
|
|
// CHECK: cond.false:
|
2021-04-10 15:41:16 +08:00
|
|
|
// CHECK-NEXT: [[CALL1:%.*]] = call i32 @_ZNK10MyIteratormiERKS_(%struct.MyIterator* nonnull dereferenceable(1) [[DOTSTOP]], %struct.MyIterator* nonnull align 1 dereferenceable(1) [[DOTSTART]])
|
2021-03-04 07:15:32 +08:00
|
|
|
// CHECK-NEXT: [[CONV2:%.*]] = sext i32 [[CALL1]] to i64
|
|
|
|
// CHECK-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTSTEP]], align 8
|
|
|
|
// CHECK-NEXT: [[DIV3:%.*]] = udiv i64 [[CONV2]], [[TMP7]]
|
|
|
|
// CHECK-NEXT: br label [[COND_END]]
|
|
|
|
// CHECK: cond.end:
|
|
|
|
// CHECK-NEXT: [[COND:%.*]] = phi i64 [ [[DIV]], [[COND_TRUE]] ], [ [[DIV3]], [[COND_FALSE]] ]
|
|
|
|
// CHECK-NEXT: [[TMP8:%.*]] = load i64*, i64** [[DISTANCE_ADDR]], align 8
|
|
|
|
// CHECK-NEXT: store i64 [[COND]], i64* [[TMP8]], align 8
|
|
|
|
// CHECK-NEXT: ret void
|
|
|
|
//
|
|
|
|
//
|
|
|
|
// CHECK-LABEL: define {{[^@]+}}@__captured_stmt.1
|
[OpenMP] Overhaul `declare target` handling
This patch fixes various issues with our prior `declare target` handling
and extends it to support `omp begin declare target` as well.
This started with PR49649 in mind, trying to provide a way for users to
avoid the "ref" global use introduced for globals with internal linkage.
From there it went down the rabbit hole, e.g., all variables, even
`nohost` ones, were emitted into the device code so it was impossible to
determine if "ref" was needed late in the game (based on the name only).
To make it really useful, `begin declare target` was needed as it can
carry the `device_type`. Not emitting variables eagerly had a ripple
effect. Finally, the precedence of the (explicit) declare target list
items needed to be taken into account, that meant we cannot just look
for any declare target attribute to make a decision. This caused the
handling of functions to require fixup as well.
I tried to clean up things while I was at it, e.g., we should not "parse
declarations and defintions" as part of OpenMP parsing, this will always
break at some point. Instead, we keep track what region we are in and
act on definitions and declarations instead, this is what we do for
declare variant and other begin/end directives already.
Highlights:
- new diagnosis for restrictions specificed in the standard,
- delayed emission of globals not mentioned in an explicit
list of a declare target,
- omission of `nohost` globals on the host and `host` globals on the
device,
- no explicit parsing of declarations in-between `omp [begin] declare
variant` and the corresponding end anymore, regular parsing instead,
- precedence for explicit mentions in `declare target` lists over
implicit mentions in the declaration-definition-seq, and
- `omp allocate` declarations will now replace an earlier emitted
global, if necessary.
---
Notes:
The patch is larger than I hoped but it turns out that most changes do
on their own lead to "inconsistent states", which seem less desirable
overall.
After working through this I feel the standard should remove the
explicit declare target forms as the delayed emission is horrible.
That said, while we delay things anyway, it seems to me we check too
often for the current status even though that is often not sufficient to
act upon. There seems to be a lot of duplication that can probably be
trimmed down. Eagerly emitting some things seems pretty weak as an
argument to keep so much logic around.
---
Reviewed By: ABataev
Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
|
|
|
// CHECK-SAME: (i32* nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i64 [[LOGICAL:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) #[[ATTR2]] {
|
2021-03-04 07:15:32 +08:00
|
|
|
// CHECK-NEXT: entry:
|
|
|
|
// CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca i32*, align 8
|
|
|
|
// CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i64, align 8
|
|
|
|
// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8
|
|
|
|
// CHECK-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_MYITERATOR:%.*]], align 1
|
|
|
|
// CHECK-NEXT: store i32* [[LOOPVAR]], i32** [[LOOPVAR_ADDR]], align 8
|
|
|
|
// CHECK-NEXT: store i64 [[LOGICAL]], i64* [[LOGICAL_ADDR]], align 8
|
|
|
|
// CHECK-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8
|
|
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8
|
|
|
|
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0
|
|
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[LOGICAL_ADDR]], align 8
|
|
|
|
// CHECK-NEXT: [[MUL:%.*]] = mul i64 1, [[TMP2]]
|
|
|
|
// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[MUL]] to i32
|
[OpenMP] Overhaul `declare target` handling
This patch fixes various issues with our prior `declare target` handling
and extends it to support `omp begin declare target` as well.
This started with PR49649 in mind, trying to provide a way for users to
avoid the "ref" global use introduced for globals with internal linkage.
From there it went down the rabbit hole, e.g., all variables, even
`nohost` ones, were emitted into the device code so it was impossible to
determine if "ref" was needed late in the game (based on the name only).
To make it really useful, `begin declare target` was needed as it can
carry the `device_type`. Not emitting variables eagerly had a ripple
effect. Finally, the precedence of the (explicit) declare target list
items needed to be taken into account, that meant we cannot just look
for any declare target attribute to make a decision. This caused the
handling of functions to require fixup as well.
I tried to clean up things while I was at it, e.g., we should not "parse
declarations and defintions" as part of OpenMP parsing, this will always
break at some point. Instead, we keep track what region we are in and
act on definitions and declarations instead, this is what we do for
declare variant and other begin/end directives already.
Highlights:
- new diagnosis for restrictions specificed in the standard,
- delayed emission of globals not mentioned in an explicit
list of a declare target,
- omission of `nohost` globals on the host and `host` globals on the
device,
- no explicit parsing of declarations in-between `omp [begin] declare
variant` and the corresponding end anymore, regular parsing instead,
- precedence for explicit mentions in `declare target` lists over
implicit mentions in the declaration-definition-seq, and
- `omp allocate` declarations will now replace an earlier emitted
global, if necessary.
---
Notes:
The patch is larger than I hoped but it turns out that most changes do
on their own lead to "inconsistent states", which seem less desirable
overall.
After working through this I feel the standard should remove the
explicit declare target forms as the delayed emission is horrible.
That said, while we delay things anyway, it seems to me we check too
often for the current status even though that is often not sufficient to
act upon. There seems to be a lot of duplication that can probably be
trimmed down. Eagerly emitting some things seems pretty weak as an
argument to keep so much logic around.
---
Reviewed By: ABataev
Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
|
|
|
// CHECK-NEXT: call void @_ZNK10MyIteratorplEj(%struct.MyIterator* sret([[STRUCT_MYITERATOR]]) align 1 [[REF_TMP]], %struct.MyIterator* nonnull dereferenceable(1) [[TMP1]], i32 [[CONV]])
|
2021-04-10 15:41:16 +08:00
|
|
|
// CHECK-NEXT: [[CALL:%.*]] = call i32 @_ZNK10MyIteratordeEv(%struct.MyIterator* nonnull dereferenceable(1) [[REF_TMP]])
|
2021-03-04 07:15:32 +08:00
|
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load i32*, i32** [[LOOPVAR_ADDR]], align 8
|
|
|
|
// CHECK-NEXT: store i32 [[CALL]], i32* [[TMP3]], align 4
|
|
|
|
// CHECK-NEXT: ret void
|
|
|
|
//
|