llvm-project/clang/test/OpenMP/single_codegen.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

5241 lines
367 KiB
C++
Raw Normal View History

// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix=CHECK1
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -fopenmp-version=45 -o - | FileCheck %s --check-prefix=CHECK2
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fnoopenmp-use-tls -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK3
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fnoopenmp-use-tls -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK4
// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -std=c++11 -fopenmp -fnoopenmp-use-tls -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK5
// RUN: %clang_cc1 -verify -fopenmp -fnoopenmp-use-tls -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK6
// RUN: %clang_cc1 -verify -fopenmp-simd -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
// RUN: %clang_cc1 -fopenmp-simd -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -fnoopenmp-use-tls -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -std=c++11 -fopenmp-simd -fnoopenmp-use-tls -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
// RUN: %clang_cc1 -verify -fopenmp-simd -fnoopenmp-use-tls -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
// expected-no-diagnostics
#ifndef ARRAY
#ifndef HEADER
#define HEADER
class TestClass {
public:
int a;
TestClass() : a(0) {}
TestClass(const TestClass &C) : a(C.a) {}
TestClass &operator=(const TestClass &) { return *this;}
~TestClass(){};
};
TestClass tc;
TestClass tc2[2];
#pragma omp threadprivate(tc, tc2)
void foo() { extern void mayThrow(); mayThrow(); }
struct SS {
int a;
int b : 4;
int &c;
SS(int &d) : a(0), b(0), c(d) {
#pragma omp parallel firstprivate(a, b, c)
#pragma omp single copyprivate(a, this->b, (this)->c)
[&]() {
++this->a, --b, (this)->c /= 1;
#pragma omp parallel firstprivate(a, b, c)
#pragma omp single copyprivate(a, this->b, (this)->c)
++(this)->a, --b, this->c /= 1;
}();
}
};
template<typename T>
struct SST {
T a;
SST() : a(T()) {
#pragma omp parallel firstprivate(a)
#pragma omp single copyprivate(this->a)
[&]() {
[&]() {
++this->a;
#pragma omp parallel firstprivate(a)
#pragma omp single copyprivate((this)->a)
++(this)->a;
}();
}();
}
};
int main() {
char a;
char a2[2];
TestClass &c = tc;
SST<double> sst;
SS ss(c.a);
#pragma omp single nowait
a = 2;
#pragma omp single
a = 2;
#pragma omp single copyprivate(a, c, tc, a2, tc2)
foo();
return a;
}
void parallel_single() {
#pragma omp parallel
#pragma omp single
foo();
}
#endif
#else
struct St {
int a, b;
St() : a(0), b(0) {}
St &operator=(const St &) { return *this; };
~St() {}
};
void array_func(int n, int a[n], St s[2]) {
#pragma omp single copyprivate(a, s)
;
}
#endif
// Private a
// Private b
// Private c
// Private a
// Private b
// Private c
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_.
// CHECK1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*
// CHECK1-NEXT: call void @_ZN9TestClassC1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[TMP2]])
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK1-NEXT: ret i8* [[TMP3]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@_ZN9TestClassC1Ev
// CHECK1-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %class.TestClass*, align 8
// CHECK1-NEXT: store %class.TestClass* [[THIS]], %class.TestClass** [[THIS_ADDR]], align 8
// CHECK1-NEXT: [[THIS1:%.*]] = load %class.TestClass*, %class.TestClass** [[THIS_ADDR]], align 8
// CHECK1-NEXT: call void @_ZN9TestClassC2Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS1]])
// CHECK1-NEXT: ret void
//
//
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_.
// CHECK1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*
// CHECK1-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[TMP2]]) #[[ATTR3:[0-9]+]]
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_ZN9TestClassD1Ev
// CHECK1-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] comdat align 2 {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %class.TestClass*, align 8
// CHECK1-NEXT: store %class.TestClass* [[THIS]], %class.TestClass** [[THIS_ADDR]], align 8
// CHECK1-NEXT: [[THIS1:%.*]] = load %class.TestClass*, %class.TestClass** [[THIS_ADDR]], align 8
// CHECK1-NEXT: call void @_ZN9TestClassD2Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR3]]
// CHECK1-NEXT: ret void
//
//
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-LABEL: define {{[^@]+}}@.__omp_threadprivate_init_.
// CHECK1-SAME: () #[[ATTR0]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK1-NEXT: call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast (%class.TestClass* @tc to i8*), i8* (i8*)* @.__kmpc_global_ctor_., i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_.)
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_..1
// CHECK1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
// CHECK1-NEXT: entry:
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [2 x %class.TestClass]*
// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %class.TestClass], [2 x %class.TestClass]* [[TMP2]], i32 0, i32 0
// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAY_BEGIN]], i64 2
// CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]]
// CHECK1: arrayctor.loop:
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %class.TestClass* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[INVOKE_CONT:%.*]] ]
// CHECK1-NEXT: invoke void @_ZN9TestClassC1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]])
// CHECK1-NEXT: to label [[INVOKE_CONT]] unwind label [[LPAD:%.*]]
// CHECK1: invoke.cont:
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYCTOR_CUR]], i64 1
// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]]
// CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]]
// CHECK1: arrayctor.cont:
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK1-NEXT: ret i8* [[TMP3]]
// CHECK1: lpad:
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-NEXT: [[TMP4:%.*]] = landingpad { i8*, i32 }
// CHECK1-NEXT: cleanup
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-NEXT: [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0
// CHECK1-NEXT: store i8* [[TMP5]], i8** [[EXN_SLOT]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 1
// CHECK1-NEXT: store i32 [[TMP6]], i32* [[EHSELECTOR_SLOT]], align 4
// CHECK1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %class.TestClass* [[ARRAY_BEGIN]], [[ARRAYCTOR_CUR]]
// CHECK1-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY:%.*]]
// CHECK1: arraydestroy.body:
// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[ARRAYCTOR_CUR]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
// CHECK1-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1]], label [[ARRAYDESTROY_BODY]]
// CHECK1: arraydestroy.done1:
// CHECK1-NEXT: br label [[EH_RESUME:%.*]]
// CHECK1: eh.resume:
// CHECK1-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
// CHECK1-NEXT: [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4
// CHECK1-NEXT: [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0
// CHECK1-NEXT: [[LPAD_VAL2:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1
// CHECK1-NEXT: resume { i8*, i32 } [[LPAD_VAL2]]
//
//
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_..2
// CHECK1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*
// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAY_BEGIN]], i64 2
// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
// CHECK1: arraydestroy.body:
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
// CHECK1-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]]
// CHECK1: arraydestroy.done1:
// CHECK1-NEXT: ret void
//
//
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-LABEL: define {{[^@]+}}@.__omp_threadprivate_init_..3
// CHECK1-SAME: () #[[ATTR0]] {
// CHECK1-NEXT: entry:
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast ([2 x %class.TestClass]* @tc2 to i8*), i8* (i8*)* @.__kmpc_global_ctor_..1, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..2)
// CHECK1-NEXT: ret void
//
//
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-LABEL: define {{[^@]+}}@__cxx_global_var_init
// CHECK1-SAME: () #[[ATTR0]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: call void @_ZN9TestClassC1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) @tc)
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%class.TestClass*)* @_ZN9TestClassD1Ev to void (i8*)*), i8* bitcast (%class.TestClass* @tc to i8*), i8* @__dso_handle) #[[ATTR3]]
// CHECK1-NEXT: ret void
//
//
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-LABEL: define {{[^@]+}}@__cxx_global_var_init.4
// CHECK1-SAME: () #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
// CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]]
// CHECK1: arrayctor.loop:
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %class.TestClass* [ getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[INVOKE_CONT:%.*]] ]
// CHECK1-NEXT: invoke void @_ZN9TestClassC1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]])
// CHECK1-NEXT: to label [[INVOKE_CONT]] unwind label [[LPAD:%.*]]
// CHECK1: invoke.cont:
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAYCTOR_CUR]], i64 1
// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYCTOR_NEXT]], getelementptr inbounds ([[CLASS_TESTCLASS]], %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), i64 2)
// CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]]
// CHECK1: arrayctor.cont:
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR3]]
// CHECK1-NEXT: ret void
// CHECK1: lpad:
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-NEXT: [[TMP1:%.*]] = landingpad { i8*, i32 }
// CHECK1-NEXT: cleanup
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-NEXT: [[TMP2:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 0
// CHECK1-NEXT: store i8* [[TMP2]], i8** [[EXN_SLOT]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 1
// CHECK1-NEXT: store i32 [[TMP3]], i32* [[EHSELECTOR_SLOT]], align 4
// CHECK1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), [[ARRAYCTOR_CUR]]
// CHECK1-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY:%.*]]
// CHECK1: arraydestroy.body:
// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[ARRAYCTOR_CUR]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
// CHECK1-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0)
// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1]], label [[ARRAYDESTROY_BODY]]
// CHECK1: arraydestroy.done1:
// CHECK1-NEXT: br label [[EH_RESUME:%.*]]
// CHECK1: eh.resume:
// CHECK1-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
// CHECK1-NEXT: [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4
// CHECK1-NEXT: [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0
// CHECK1-NEXT: [[LPAD_VAL2:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1
// CHECK1-NEXT: resume { i8*, i32 } [[LPAD_VAL2]]
//
//
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-LABEL: define {{[^@]+}}@__cxx_global_array_dtor
// CHECK1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
// CHECK1: arraydestroy.body:
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ getelementptr inbounds ([[CLASS_TESTCLASS:%.*]], %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
// CHECK1-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0)
// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]]
// CHECK1: arraydestroy.done1:
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_ZN9TestClassC2Ev
// CHECK1-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %class.TestClass*, align 8
// CHECK1-NEXT: store %class.TestClass* [[THIS]], %class.TestClass** [[THIS_ADDR]], align 8
// CHECK1-NEXT: [[THIS1:%.*]] = load %class.TestClass*, %class.TestClass** [[THIS_ADDR]], align 8
// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[THIS1]], i32 0, i32 0
// CHECK1-NEXT: store i32 0, i32* [[A]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_ZN9TestClassD2Ev
// CHECK1-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %class.TestClass*, align 8
// CHECK1-NEXT: store %class.TestClass* [[THIS]], %class.TestClass** [[THIS_ADDR]], align 8
// CHECK1-NEXT: [[THIS1:%.*]] = load %class.TestClass*, %class.TestClass** [[THIS_ADDR]], align 8
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_Z3foov
// CHECK1-SAME: () #[[ATTR4:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: call void @_Z8mayThrowv()
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@main
// CHECK1-SAME: () #[[ATTR6:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[A:%.*]] = alloca i8, align 1
// CHECK1-NEXT: [[A2:%.*]] = alloca [2 x i8], align 1
// CHECK1-NEXT: [[C:%.*]] = alloca %class.TestClass*, align 8
// CHECK1-NEXT: [[SST:%.*]] = alloca [[STRUCT_SST:%.*]], align 8
// CHECK1-NEXT: [[SS:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
// CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [5 x i8*], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4
// CHECK1-NEXT: store %class.TestClass* @tc, %class.TestClass** [[C]], align 8
// CHECK1-NEXT: call void @_ZN3SSTIdEC1Ev(%struct.SST* nonnull align 8 dereferenceable(8) [[SST]])
// CHECK1-NEXT: call void @_ZN2SSC1ERi(%struct.SS* nonnull align 8 dereferenceable(16) [[SS]], i32* nonnull align 4 dereferenceable(4) getelementptr inbounds ([[CLASS_TESTCLASS:%.*]], %class.TestClass* @tc, i32 0, i32 0))
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK1: omp_if.then:
// CHECK1-NEXT: store i8 2, i8* [[A]], align 1
// CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK1: omp_if.end:
// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
// CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN1:%.*]], label [[OMP_IF_END2:%.*]]
// CHECK1: omp_if.then1:
// CHECK1-NEXT: store i8 2, i8* [[A]], align 1
// CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: br label [[OMP_IF_END2]]
// CHECK1: omp_if.end2:
// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN3:%.*]], label [[OMP_IF_END4:%.*]]
// CHECK1: omp_if.then3:
// CHECK1-NEXT: invoke void @_Z3foov()
// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
// CHECK1: invoke.cont:
// CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK1-NEXT: br label [[OMP_IF_END4]]
// CHECK1: lpad:
// CHECK1-NEXT: [[TMP7:%.*]] = landingpad { i8*, i32 }
// CHECK1-NEXT: catch i8* null
// CHECK1-NEXT: [[TMP8:%.*]] = extractvalue { i8*, i32 } [[TMP7]], 0
// CHECK1-NEXT: store i8* [[TMP8]], i8** [[EXN_SLOT]], align 8
// CHECK1-NEXT: [[TMP9:%.*]] = extractvalue { i8*, i32 } [[TMP7]], 1
// CHECK1-NEXT: store i32 [[TMP9]], i32* [[EHSELECTOR_SLOT]], align 4
// CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: br label [[TERMINATE_HANDLER:%.*]]
// CHECK1: omp_if.end4:
// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0
// CHECK1-NEXT: store i8* [[A]], i8** [[TMP10]], align 8
// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1
// CHECK1-NEXT: store i8* bitcast (%class.TestClass* @tc to i8*), i8** [[TMP11]], align 8
// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2
// CHECK1-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (%class.TestClass* @tc to i8*), i64 4, i8*** @tc.cache.)
// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to %class.TestClass*
// CHECK1-NEXT: [[TMP15:%.*]] = bitcast %class.TestClass* [[TMP14]] to i8*
// CHECK1-NEXT: store i8* [[TMP15]], i8** [[TMP12]], align 8
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 3
// CHECK1-NEXT: [[TMP17:%.*]] = bitcast [2 x i8]* [[A2]] to i8*
// CHECK1-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8
// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 4
// CHECK1-NEXT: [[TMP19:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast ([2 x %class.TestClass]* @tc2 to i8*), i64 8, i8*** @tc2.cache.)
// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to [2 x %class.TestClass]*
// CHECK1-NEXT: [[TMP21:%.*]] = bitcast [2 x %class.TestClass]* [[TMP20]] to i8*
// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP18]], align 8
// CHECK1-NEXT: [[TMP22:%.*]] = bitcast [5 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*
// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK1-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i64 40, i8* [[TMP22]], void (i8*, i8*)* @.omp.copyprivate.copy_func, i32 [[TMP23]])
// CHECK1-NEXT: [[TMP24:%.*]] = load i8, i8* [[A]], align 1
// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP24]] to i32
// CHECK1-NEXT: ret i32 [[CONV]]
// CHECK1: terminate.handler:
// CHECK1-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[EXN]]) #[[ATTR13:[0-9]+]]
// CHECK1-NEXT: unreachable
//
//
// CHECK1-LABEL: define {{[^@]+}}@_ZN3SSTIdEC1Ev
// CHECK1-SAME: (%struct.SST* nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8
// CHECK1-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8
// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8
// CHECK1-NEXT: call void @_ZN3SSTIdEC2Ev(%struct.SST* nonnull align 8 dereferenceable(8) [[THIS1]])
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_ZN2SSC1ERi
// CHECK1-SAME: (%struct.SS* nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32* nonnull align 4 dereferenceable(4) [[D:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8
// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8
// CHECK1-NEXT: store i32* [[D]], i32** [[D_ADDR]], align 8
// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[D_ADDR]], align 8
// CHECK1-NEXT: call void @_ZN2SSC2ERi(%struct.SS* nonnull align 8 dereferenceable(16) [[THIS1]], i32* nonnull align 4 dereferenceable(4) [[TMP0]])
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@__clang_call_terminate
// CHECK1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR8:[0-9]+]] comdat {
// CHECK1-NEXT: [[TMP2:%.*]] = call i8* @__cxa_begin_catch(i8* [[TMP0]]) #[[ATTR3]]
// CHECK1-NEXT: call void @_ZSt9terminatev() #[[ATTR13]]
// CHECK1-NEXT: unreachable
//
//
// CHECK1-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func
// CHECK1-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR9:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [5 x i8*]*
// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [5 x i8*]*
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP3]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP5]], i64 0, i64 0
// CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8
// CHECK1-NEXT: [[TMP10:%.*]] = load i8, i8* [[TMP9]], align 1
// CHECK1-NEXT: store i8 [[TMP10]], i8* [[TMP7]], align 1
// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP3]], i64 0, i64 1
// CHECK1-NEXT: [[TMP12:%.*]] = load i8*, i8** [[TMP11]], align 8
// CHECK1-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to %class.TestClass*
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP5]], i64 0, i64 1
// CHECK1-NEXT: [[TMP15:%.*]] = load i8*, i8** [[TMP14]], align 8
// CHECK1-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to %class.TestClass*
// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(4) %class.TestClass* @_ZN9TestClassaSERKS_(%class.TestClass* nonnull align 4 dereferenceable(4) [[TMP13]], %class.TestClass* nonnull align 4 dereferenceable(4) [[TMP16]])
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP3]], i64 0, i64 2
// CHECK1-NEXT: [[TMP18:%.*]] = load i8*, i8** [[TMP17]], align 8
// CHECK1-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP18]] to %class.TestClass*
// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP5]], i64 0, i64 2
// CHECK1-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8
// CHECK1-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP21]] to %class.TestClass*
// CHECK1-NEXT: [[CALL2:%.*]] = call nonnull align 4 dereferenceable(4) %class.TestClass* @_ZN9TestClassaSERKS_(%class.TestClass* nonnull align 4 dereferenceable(4) [[TMP19]], %class.TestClass* nonnull align 4 dereferenceable(4) [[TMP22]])
// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP3]], i64 0, i64 3
// CHECK1-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP23]], align 8
// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP5]], i64 0, i64 3
// CHECK1-NEXT: [[TMP26:%.*]] = load i8*, i8** [[TMP25]], align 8
// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[TMP24]], i8* align 1 [[TMP26]], i64 2, i1 false)
// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP3]], i64 0, i64 4
// CHECK1-NEXT: [[TMP28:%.*]] = load i8*, i8** [[TMP27]], align 8
// CHECK1-NEXT: [[TMP29:%.*]] = bitcast i8* [[TMP28]] to %class.TestClass*
// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP5]], i64 0, i64 4
// CHECK1-NEXT: [[TMP31:%.*]] = load i8*, i8** [[TMP30]], align 8
// CHECK1-NEXT: [[TMP32:%.*]] = bitcast i8* [[TMP31]] to %class.TestClass*
// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[TMP29]], i64 2
// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %class.TestClass* [[TMP29]], [[TMP33]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK1: omp.arraycpy.body:
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %class.TestClass* [ [[TMP32]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %class.TestClass* [ [[TMP29]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK1-NEXT: [[CALL3:%.*]] = call nonnull align 4 dereferenceable(4) %class.TestClass* @_ZN9TestClassaSERKS_(%class.TestClass* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %class.TestClass* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]])
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[CLASS_TESTCLASS]], %class.TestClass* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[CLASS_TESTCLASS]], %class.TestClass* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %class.TestClass* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP33]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]]
// CHECK1: omp.arraycpy.done4:
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_ZN9TestClassaSERKS_
// CHECK1-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]], %class.TestClass* nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR10:[0-9]+]] comdat align 2 {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %class.TestClass*, align 8
// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca %class.TestClass*, align 8
// CHECK1-NEXT: store %class.TestClass* [[THIS]], %class.TestClass** [[THIS_ADDR]], align 8
// CHECK1-NEXT: store %class.TestClass* [[TMP0]], %class.TestClass** [[DOTADDR]], align 8
// CHECK1-NEXT: [[THIS1:%.*]] = load %class.TestClass*, %class.TestClass** [[THIS_ADDR]], align 8
// CHECK1-NEXT: ret %class.TestClass* [[THIS1]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@_ZN3SSTIdEC2Ev
// CHECK1-SAME: (%struct.SST* nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8
// CHECK1-NEXT: [[A2:%.*]] = alloca double*, align 8
// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8
// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8
// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], %struct.SST* [[THIS1]], i32 0, i32 0
// CHECK1-NEXT: store double 0.000000e+00, double* [[A]], align 8
// CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SST]], %struct.SST* [[THIS1]], i32 0, i32 0
// CHECK1-NEXT: store double* [[A3]], double** [[A2]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[A2]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load double, double* [[TMP0]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to double*
// CHECK1-NEXT: store double [[TMP1]], double* [[CONV]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SST*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SST* [[THIS1]], i64 [[TMP2]])
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.
// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SST* [[THIS:%.*]], i64 [[A:%.*]]) #[[ATTR12:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8
// CHECK1-NEXT: [[_TMP1:%.*]] = alloca double*, align 8
// CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 8
// CHECK1-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x i8*], align 8
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8
// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to double*
// CHECK1-NEXT: store double* [[CONV]], double** [[TMP]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[TMP]], align 8
// CHECK1-NEXT: store double* [[TMP1]], double** [[_TMP1]], align 8
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]])
// CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
// CHECK1-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK1: omp_if.then:
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 0
// CHECK1-NEXT: store %struct.SST* [[TMP0]], %struct.SST** [[TMP6]], align 8
// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 1
// CHECK1-NEXT: [[TMP8:%.*]] = load double*, double** [[_TMP1]], align 8
// CHECK1-NEXT: store double* [[TMP8]], double** [[TMP7]], align 8
// CHECK1-NEXT: invoke void @_ZZN3SSTIdEC1EvENKUlvE_clEv(%class.anon* nonnull align 8 dereferenceable(16) [[REF_TMP]])
// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
// CHECK1: invoke.cont:
// CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]])
// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK1: lpad:
// CHECK1-NEXT: [[TMP9:%.*]] = landingpad { i8*, i32 }
// CHECK1-NEXT: catch i8* null
// CHECK1-NEXT: [[TMP10:%.*]] = extractvalue { i8*, i32 } [[TMP9]], 0
// CHECK1-NEXT: store i8* [[TMP10]], i8** [[EXN_SLOT]], align 8
// CHECK1-NEXT: [[TMP11:%.*]] = extractvalue { i8*, i32 } [[TMP9]], 1
// CHECK1-NEXT: store i32 [[TMP11]], i32* [[EHSELECTOR_SLOT]], align 4
// CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]])
// CHECK1-NEXT: br label [[TERMINATE_HANDLER:%.*]]
// CHECK1: omp_if.end:
// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP13:%.*]] = load double*, double** [[_TMP1]], align 8
// CHECK1-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to i8*
// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP12]], align 8
// CHECK1-NEXT: [[TMP15:%.*]] = bitcast [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*
// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK1-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i64 8, i8* [[TMP15]], void (i8*, i8*)* @.omp.copyprivate.copy_func.5, i32 [[TMP16]])
// CHECK1-NEXT: ret void
// CHECK1: terminate.handler:
// CHECK1-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[EXN]]) #[[ATTR13]]
// CHECK1-NEXT: unreachable
//
//
// CHECK1-LABEL: define {{[^@]+}}@_ZZN3SSTIdEC1EvENKUlvE_clEv
// CHECK1-SAME: (%class.anon* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR4]] align 2 {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon*, align 8
// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8
// CHECK1-NEXT: store %class.anon* [[THIS]], %class.anon** [[THIS_ADDR]], align 8
// CHECK1-NEXT: [[THIS1:%.*]] = load %class.anon*, %class.anon** [[THIS_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON:%.*]], %class.anon* [[THIS1]], i32 0, i32 0
// CHECK1-NEXT: [[TMP1:%.*]] = load %struct.SST*, %struct.SST** [[TMP0]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0
// CHECK1-NEXT: store %struct.SST* [[TMP1]], %struct.SST** [[TMP2]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1
// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[THIS1]], i32 0, i32 1
// CHECK1-NEXT: [[TMP5:%.*]] = load double*, double** [[TMP4]], align 8
// CHECK1-NEXT: store double* [[TMP5]], double** [[TMP3]], align 8
// CHECK1-NEXT: call void @_ZZZN3SSTIdEC1EvENKUlvE_clEvENKUlvE_clEv(%class.anon.0* nonnull align 8 dereferenceable(16) [[REF_TMP]])
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.5
// CHECK1-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR9]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to double*
// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
// CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
// CHECK1-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to double*
// CHECK1-NEXT: [[TMP12:%.*]] = load double, double* [[TMP11]], align 8
// CHECK1-NEXT: store double [[TMP12]], double* [[TMP8]], align 8
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_ZZZN3SSTIdEC1EvENKUlvE_clEvENKUlvE_clEv
// CHECK1-SAME: (%class.anon.0* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR10]] align 2 {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon.0*, align 8
// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: store %class.anon.0* [[THIS]], %class.anon.0** [[THIS_ADDR]], align 8
// CHECK1-NEXT: [[THIS1:%.*]] = load %class.anon.0*, %class.anon.0** [[THIS_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], %class.anon.0* [[THIS1]], i32 0, i32 0
// CHECK1-NEXT: [[TMP1:%.*]] = load %struct.SST*, %struct.SST** [[TMP0]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 1
// CHECK1-NEXT: [[TMP3:%.*]] = load double*, double** [[TMP2]], align 8
// CHECK1-NEXT: [[TMP4:%.*]] = load double, double* [[TMP3]], align 8
// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
// CHECK1-NEXT: store double [[INC]], double* [[TMP3]], align 8
// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 1
// CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[TMP5]], align 8
// CHECK1-NEXT: [[TMP7:%.*]] = load double, double* [[TMP6]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to double*
// CHECK1-NEXT: store double [[TMP7]], double* [[CONV]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SST*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SST* [[TMP1]], i64 [[TMP8]])
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6
// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SST* [[THIS:%.*]], i64 [[A:%.*]]) #[[ATTR12]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8
// CHECK1-NEXT: [[_TMP1:%.*]] = alloca double*, align 8
// CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x i8*], align 8
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8
// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to double*
// CHECK1-NEXT: store double* [[CONV]], double** [[TMP]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[TMP]], align 8
// CHECK1-NEXT: store double* [[TMP1]], double** [[_TMP1]], align 8
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]])
// CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
// CHECK1-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK1: omp_if.then:
// CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[_TMP1]], align 8
// CHECK1-NEXT: [[TMP7:%.*]] = load double, double* [[TMP6]], align 8
// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP7]], 1.000000e+00
// CHECK1-NEXT: store double [[INC]], double* [[TMP6]], align 8
// CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]])
// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK1: omp_if.end:
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP9:%.*]] = load double*, double** [[_TMP1]], align 8
// CHECK1-NEXT: [[TMP10:%.*]] = bitcast double* [[TMP9]] to i8*
// CHECK1-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 8
// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK1-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i64 8, i8* [[TMP11]], void (i8*, i8*)* @.omp.copyprivate.copy_func.7, i32 [[TMP12]])
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.7
// CHECK1-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR9]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to double*
// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
// CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
// CHECK1-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to double*
// CHECK1-NEXT: [[TMP12:%.*]] = load double, double* [[TMP11]], align 8
// CHECK1-NEXT: store double [[TMP12]], double* [[TMP8]], align 8
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_ZN2SSC2ERi
// CHECK1-SAME: (%struct.SS* nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32* nonnull align 4 dereferenceable(4) [[D:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8
// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[A2:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[B4:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[C7:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8
// CHECK1-NEXT: store i32* [[D]], i32** [[D_ADDR]], align 8
// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8
// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[THIS1]], i32 0, i32 0
// CHECK1-NEXT: store i32 0, i32* [[A]], align 8
// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 1
// CHECK1-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[B]], align 4
// CHECK1-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -16
// CHECK1-NEXT: store i8 [[BF_CLEAR]], i8* [[B]], align 4
// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 2
// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[D_ADDR]], align 8
// CHECK1-NEXT: store i32* [[TMP0]], i32** [[C]], align 8
// CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 0
// CHECK1-NEXT: store i32* [[A3]], i32** [[A2]], align 8
// CHECK1-NEXT: [[B5:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 1
// CHECK1-NEXT: [[BF_LOAD6:%.*]] = load i8, i8* [[B5]], align 4
// CHECK1-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD6]], 4
// CHECK1-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 4
// CHECK1-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32
// CHECK1-NEXT: store i32 [[BF_CAST]], i32* [[B4]], align 4
// CHECK1-NEXT: [[C8:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 2
// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[C8]], align 8
// CHECK1-NEXT: store i32* [[TMP1]], i32** [[C7]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A2]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[B4]], align 4
// CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[B_CASTED]] to i32*
// CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV9]], align 4
// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8
// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[C7]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
// CHECK1-NEXT: [[CONV10:%.*]] = bitcast i64* [[C_CASTED]] to i32*
// CHECK1-NEXT: store i32 [[TMP8]], i32* [[CONV10]], align 4
// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[C_CASTED]], align 8
// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i64, i64, i64)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.SS* [[THIS1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP9]])
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8
// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SS* [[THIS:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]]) #[[ATTR12]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[TMP:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[_TMP5:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8
// CHECK1-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x i8*], align 8
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8
// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK1-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32*
// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[C_ADDR]] to i32*
// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP]], align 8
// CHECK1-NEXT: store i32* [[CONV2]], i32** [[_TMP3]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP]], align 8
// CHECK1-NEXT: store i32* [[TMP1]], i32** [[_TMP4]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[_TMP3]], align 8
// CHECK1-NEXT: store i32* [[TMP2]], i32** [[_TMP5]], align 8
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]])
// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK1: omp_if.then:
// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0
// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP7]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1
// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[_TMP4]], align 8
// CHECK1-NEXT: store i32* [[TMP9]], i32** [[TMP8]], align 8
// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2
// CHECK1-NEXT: store i32* [[CONV1]], i32** [[TMP10]], align 8
// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3
// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[_TMP5]], align 8
// CHECK1-NEXT: store i32* [[TMP12]], i32** [[TMP11]], align 8
// CHECK1-NEXT: invoke void @_ZZN2SSC1ERiENKUlvE_clEv(%class.anon.1* nonnull align 8 dereferenceable(32) [[REF_TMP]])
// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
// CHECK1: invoke.cont:
// CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]])
// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK1: lpad:
// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 }
// CHECK1-NEXT: catch i8* null
// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0
// CHECK1-NEXT: store i8* [[TMP14]], i8** [[EXN_SLOT]], align 8
// CHECK1-NEXT: [[TMP15:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 1
// CHECK1-NEXT: store i32 [[TMP15]], i32* [[EHSELECTOR_SLOT]], align 4
// CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]])
// CHECK1-NEXT: br label [[TERMINATE_HANDLER:%.*]]
// CHECK1: omp_if.end:
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[_TMP4]], align 8
// CHECK1-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP17]] to i8*
// CHECK1-NEXT: store i8* [[TMP18]], i8** [[TMP16]], align 8
// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1
// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i32* [[CONV1]] to i8*
// CHECK1-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8
// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2
// CHECK1-NEXT: [[TMP22:%.*]] = load i32*, i32** [[_TMP5]], align 8
// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP22]] to i8*
// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP21]], align 8
// CHECK1-NEXT: [[TMP24:%.*]] = bitcast [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*
// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK1-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i64 24, i8* [[TMP24]], void (i8*, i8*)* @.omp.copyprivate.copy_func.9, i32 [[TMP25]])
// CHECK1-NEXT: ret void
// CHECK1: terminate.handler:
// CHECK1-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[EXN]]) #[[ATTR13]]
// CHECK1-NEXT: unreachable
//
//
// CHECK1-LABEL: define {{[^@]+}}@_ZZN2SSC1ERiENKUlvE_clEv
// CHECK1-SAME: (%class.anon.1* nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR10]] align 2 {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon.1*, align 8
// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: store %class.anon.1* [[THIS]], %class.anon.1** [[THIS_ADDR]], align 8
// CHECK1-NEXT: [[THIS1:%.*]] = load %class.anon.1*, %class.anon.1** [[THIS_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1:%.*]], %class.anon.1* [[THIS1]], i32 0, i32 0
// CHECK1-NEXT: [[TMP1:%.*]] = load %struct.SS*, %struct.SS** [[TMP0]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 1
// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP3]], align 4
// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 2
// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4
// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP7]], -1
// CHECK1-NEXT: store i32 [[DEC]], i32* [[TMP6]], align 4
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 3
// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[TMP8]], align 8
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1
// CHECK1-NEXT: store i32 [[DIV]], i32* [[TMP9]], align 4
// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 1
// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 8
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
// CHECK1-NEXT: store i32 [[TMP13]], i32* [[CONV]], align 4
// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 2
// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[TMP15]], align 8
// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4
// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32*
// CHECK1-NEXT: store i32 [[TMP17]], i32* [[CONV2]], align 4
// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[B_CASTED]], align 8
// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 3
// CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP19]], align 8
// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[C_CASTED]] to i32*
// CHECK1-NEXT: store i32 [[TMP21]], i32* [[CONV3]], align 4
// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[C_CASTED]], align 8
// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i64, i64, i64)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.SS* [[TMP1]], i64 [[TMP14]], i64 [[TMP18]], i64 [[TMP22]])
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.9
// CHECK1-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR9]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [3 x i8*]*
// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [3 x i8*]*
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 0
// CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
// CHECK1-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
// CHECK1-NEXT: store i32 [[TMP12]], i32* [[TMP8]], align 4
// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 1
// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 8
// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i32*
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 1
// CHECK1-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8
// CHECK1-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i32*
// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4
// CHECK1-NEXT: store i32 [[TMP19]], i32* [[TMP15]], align 4
// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 2
// CHECK1-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8
// CHECK1-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP21]] to i32*
// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 2
// CHECK1-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP23]], align 8
// CHECK1-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to i32*
// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
// CHECK1-NEXT: store i32 [[TMP26]], i32* [[TMP22]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10
// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SS* [[THIS:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]]) #[[ATTR12]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[TMP:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[_TMP5:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x i8*], align 8
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8
// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK1-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32*
// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[C_ADDR]] to i32*
// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP]], align 8
// CHECK1-NEXT: store i32* [[CONV2]], i32** [[_TMP3]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP]], align 8
// CHECK1-NEXT: store i32* [[TMP1]], i32** [[_TMP4]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[_TMP3]], align 8
// CHECK1-NEXT: store i32* [[TMP2]], i32** [[_TMP5]], align 8
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]])
// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK1: omp_if.then:
// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[_TMP4]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1
// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP7]], align 4
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 8
// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP9]], -1
// CHECK1-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 8
// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP5]], align 8
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 1
// CHECK1-NEXT: store i32 [[DIV]], i32* [[TMP10]], align 4
// CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]])
// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK1: omp_if.end:
// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP4]], align 8
// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8*
// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP12]], align 8
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1
// CHECK1-NEXT: [[TMP16:%.*]] = bitcast i32* [[CONV1]] to i8*
// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2
// CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[_TMP5]], align 8
// CHECK1-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to i8*
// CHECK1-NEXT: store i8* [[TMP19]], i8** [[TMP17]], align 8
// CHECK1-NEXT: [[TMP20:%.*]] = bitcast [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*
// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK1-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i64 24, i8* [[TMP20]], void (i8*, i8*)* @.omp.copyprivate.copy_func.11, i32 [[TMP21]])
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.11
// CHECK1-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR9]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [3 x i8*]*
// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [3 x i8*]*
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 0
// CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
// CHECK1-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
// CHECK1-NEXT: store i32 [[TMP12]], i32* [[TMP8]], align 4
// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 1
// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 8
// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i32*
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 1
// CHECK1-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8
// CHECK1-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i32*
// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4
// CHECK1-NEXT: store i32 [[TMP19]], i32* [[TMP15]], align 4
// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 2
// CHECK1-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8
// CHECK1-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP21]] to i32*
// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 2
// CHECK1-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP23]], align 8
// CHECK1-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to i32*
// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
// CHECK1-NEXT: store i32 [[TMP26]], i32* [[TMP22]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_Z15parallel_singlev
// CHECK1-SAME: () #[[ATTR10]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..12 to void (i32*, i32*, ...)*))
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12
// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR12]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
// CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK1: omp_if.then:
// CHECK1-NEXT: invoke void @_Z3foov()
// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
// CHECK1: invoke.cont:
// CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK1: lpad:
// CHECK1-NEXT: [[TMP4:%.*]] = landingpad { i8*, i32 }
// CHECK1-NEXT: catch i8* null
// CHECK1-NEXT: [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0
// CHECK1-NEXT: store i8* [[TMP5]], i8** [[EXN_SLOT]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 1
// CHECK1-NEXT: store i32 [[TMP6]], i32* [[EHSELECTOR_SLOT]], align 4
// CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
// CHECK1-NEXT: br label [[TERMINATE_HANDLER:%.*]]
// CHECK1: omp_if.end:
// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]])
// CHECK1-NEXT: ret void
// CHECK1: terminate.handler:
// CHECK1-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[EXN]]) #[[ATTR13]]
// CHECK1-NEXT: unreachable
//
//
// CHECK1-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_single_codegen.cpp
// CHECK1-SAME: () #[[ATTR0]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: call void @__cxx_global_var_init()
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-NEXT: call void @__cxx_global_var_init.4()
// CHECK1-NEXT: call void @.__omp_threadprivate_init_.()
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK1-NEXT: call void @.__omp_threadprivate_init_..3()
// CHECK1-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@__cxx_global_var_init
// CHECK2-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: call void @_ZN9TestClassC1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) @tc)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%class.TestClass*)* @_ZN9TestClassD1Ev to void (i8*)*), i8* bitcast (%class.TestClass* @tc to i8*), i8* @__dso_handle) #[[ATTR3:[0-9]+]]
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@_ZN9TestClassC1Ev
// CHECK2-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %class.TestClass*, align 8
// CHECK2-NEXT: store %class.TestClass* [[THIS]], %class.TestClass** [[THIS_ADDR]], align 8
// CHECK2-NEXT: [[THIS1:%.*]] = load %class.TestClass*, %class.TestClass** [[THIS_ADDR]], align 8
// CHECK2-NEXT: call void @_ZN9TestClassC2Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS1]])
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@_ZN9TestClassD1Ev
// CHECK2-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] comdat align 2 {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %class.TestClass*, align 8
// CHECK2-NEXT: store %class.TestClass* [[THIS]], %class.TestClass** [[THIS_ADDR]], align 8
// CHECK2-NEXT: [[THIS1:%.*]] = load %class.TestClass*, %class.TestClass** [[THIS_ADDR]], align 8
// CHECK2-NEXT: call void @_ZN9TestClassD2Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR3]]
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@__cxx_global_var_init.1
// CHECK2-SAME: () #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
// CHECK2-NEXT: br label [[ARRAYCTOR_LOOP:%.*]]
// CHECK2: arrayctor.loop:
// CHECK2-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %class.TestClass* [ getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[INVOKE_CONT:%.*]] ]
// CHECK2-NEXT: invoke void @_ZN9TestClassC1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]])
// CHECK2-NEXT: to label [[INVOKE_CONT]] unwind label [[LPAD:%.*]]
// CHECK2: invoke.cont:
// CHECK2-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAYCTOR_CUR]], i64 1
// CHECK2-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYCTOR_NEXT]], getelementptr inbounds ([[CLASS_TESTCLASS]], %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), i64 2)
// CHECK2-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]]
// CHECK2: arrayctor.cont:
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR3]]
// CHECK2-NEXT: ret void
// CHECK2: lpad:
// CHECK2-NEXT: [[TMP1:%.*]] = landingpad { i8*, i32 }
// CHECK2-NEXT: cleanup
// CHECK2-NEXT: [[TMP2:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 0
// CHECK2-NEXT: store i8* [[TMP2]], i8** [[EXN_SLOT]], align 8
// CHECK2-NEXT: [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 1
// CHECK2-NEXT: store i32 [[TMP3]], i32* [[EHSELECTOR_SLOT]], align 4
// CHECK2-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), [[ARRAYCTOR_CUR]]
// CHECK2-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY:%.*]]
// CHECK2: arraydestroy.body:
// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[ARRAYCTOR_CUR]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
// CHECK2-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
// CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0)
// CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1]], label [[ARRAYDESTROY_BODY]]
// CHECK2: arraydestroy.done1:
// CHECK2-NEXT: br label [[EH_RESUME:%.*]]
// CHECK2: eh.resume:
// CHECK2-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
// CHECK2-NEXT: [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4
// CHECK2-NEXT: [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0
// CHECK2-NEXT: [[LPAD_VAL2:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1
// CHECK2-NEXT: resume { i8*, i32 } [[LPAD_VAL2]]
//
//
// CHECK2-LABEL: define {{[^@]+}}@__cxx_global_array_dtor
// CHECK2-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
// CHECK2: arraydestroy.body:
// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ getelementptr inbounds ([[CLASS_TESTCLASS:%.*]], %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
// CHECK2-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
// CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0)
// CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]]
// CHECK2: arraydestroy.done1:
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_.
// CHECK2-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*
// CHECK2-NEXT: call void @_ZN9TestClassC1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[TMP2]])
// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK2-NEXT: ret i8* [[TMP3]]
//
//
// CHECK2-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_.
// CHECK2-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*
// CHECK2-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[TMP2]]) #[[ATTR3]]
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@.__omp_threadprivate_init_.
// CHECK2-SAME: () #[[ATTR0]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK2-NEXT: call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast (%class.TestClass* @tc to i8*), i8* (i8*)* @.__kmpc_global_ctor_., i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_.)
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_..2
// CHECK2-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [2 x %class.TestClass]*
// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %class.TestClass], [2 x %class.TestClass]* [[TMP2]], i32 0, i32 0
// CHECK2-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAY_BEGIN]], i64 2
// CHECK2-NEXT: br label [[ARRAYCTOR_LOOP:%.*]]
// CHECK2: arrayctor.loop:
// CHECK2-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %class.TestClass* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[INVOKE_CONT:%.*]] ]
// CHECK2-NEXT: invoke void @_ZN9TestClassC1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]])
// CHECK2-NEXT: to label [[INVOKE_CONT]] unwind label [[LPAD:%.*]]
// CHECK2: invoke.cont:
// CHECK2-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYCTOR_CUR]], i64 1
// CHECK2-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]]
// CHECK2-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]]
// CHECK2: arrayctor.cont:
// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK2-NEXT: ret i8* [[TMP3]]
// CHECK2: lpad:
// CHECK2-NEXT: [[TMP4:%.*]] = landingpad { i8*, i32 }
// CHECK2-NEXT: cleanup
// CHECK2-NEXT: [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0
// CHECK2-NEXT: store i8* [[TMP5]], i8** [[EXN_SLOT]], align 8
// CHECK2-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 1
// CHECK2-NEXT: store i32 [[TMP6]], i32* [[EHSELECTOR_SLOT]], align 4
// CHECK2-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %class.TestClass* [[ARRAY_BEGIN]], [[ARRAYCTOR_CUR]]
// CHECK2-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY:%.*]]
// CHECK2: arraydestroy.body:
// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[ARRAYCTOR_CUR]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
// CHECK2-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
// CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
// CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1]], label [[ARRAYDESTROY_BODY]]
// CHECK2: arraydestroy.done1:
// CHECK2-NEXT: br label [[EH_RESUME:%.*]]
// CHECK2: eh.resume:
// CHECK2-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
// CHECK2-NEXT: [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4
// CHECK2-NEXT: [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0
// CHECK2-NEXT: [[LPAD_VAL2:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1
// CHECK2-NEXT: resume { i8*, i32 } [[LPAD_VAL2]]
//
//
// CHECK2-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_..3
// CHECK2-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*
// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAY_BEGIN]], i64 2
// CHECK2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
// CHECK2: arraydestroy.body:
// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
// CHECK2-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
// CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
// CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]]
// CHECK2: arraydestroy.done1:
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@.__omp_threadprivate_init_..4
// CHECK2-SAME: () #[[ATTR0]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast ([2 x %class.TestClass]* @tc2 to i8*), i8* (i8*)* @.__kmpc_global_ctor_..2, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..3)
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@_Z3foov
// CHECK2-SAME: () #[[ATTR4:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: call void @_Z8mayThrowv()
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@main
// CHECK2-SAME: () #[[ATTR6:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[A:%.*]] = alloca i8, align 1
// CHECK2-NEXT: [[A2:%.*]] = alloca [2 x i8], align 1
// CHECK2-NEXT: [[C:%.*]] = alloca %class.TestClass*, align 8
// CHECK2-NEXT: [[SST:%.*]] = alloca [[STRUCT_SST:%.*]], align 8
// CHECK2-NEXT: [[SS:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
// CHECK2-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [5 x i8*], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: store i32 0, i32* [[RETVAL]], align 4
// CHECK2-NEXT: store %class.TestClass* @tc, %class.TestClass** [[C]], align 8
// CHECK2-NEXT: call void @_ZN3SSTIdEC1Ev(%struct.SST* nonnull align 8 dereferenceable(8) [[SST]])
// CHECK2-NEXT: call void @_ZN2SSC1ERi(%struct.SS* nonnull align 8 dereferenceable(16) [[SS]], i32* nonnull align 4 dereferenceable(4) getelementptr inbounds ([[CLASS_TESTCLASS:%.*]], %class.TestClass* @tc, i32 0, i32 0))
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK2: omp_if.then:
// CHECK2-NEXT: store i8 2, i8* [[A]], align 1
// CHECK2-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: br label [[OMP_IF_END]]
// CHECK2: omp_if.end:
// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
// CHECK2-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN1:%.*]], label [[OMP_IF_END2:%.*]]
// CHECK2: omp_if.then1:
// CHECK2-NEXT: store i8 2, i8* [[A]], align 1
// CHECK2-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: br label [[OMP_IF_END2]]
// CHECK2: omp_if.end2:
// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
// CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN3:%.*]], label [[OMP_IF_END4:%.*]]
// CHECK2: omp_if.then3:
// CHECK2-NEXT: invoke void @_Z3foov()
// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
// CHECK2: invoke.cont:
// CHECK2-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK2-NEXT: br label [[OMP_IF_END4]]
// CHECK2: lpad:
// CHECK2-NEXT: [[TMP7:%.*]] = landingpad { i8*, i32 }
// CHECK2-NEXT: catch i8* null
// CHECK2-NEXT: [[TMP8:%.*]] = extractvalue { i8*, i32 } [[TMP7]], 0
// CHECK2-NEXT: store i8* [[TMP8]], i8** [[EXN_SLOT]], align 8
// CHECK2-NEXT: [[TMP9:%.*]] = extractvalue { i8*, i32 } [[TMP7]], 1
// CHECK2-NEXT: store i32 [[TMP9]], i32* [[EHSELECTOR_SLOT]], align 4
// CHECK2-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: br label [[TERMINATE_HANDLER:%.*]]
// CHECK2: omp_if.end4:
// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0
// CHECK2-NEXT: store i8* [[A]], i8** [[TMP10]], align 8
// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1
// CHECK2-NEXT: store i8* bitcast (%class.TestClass* @tc to i8*), i8** [[TMP11]], align 8
// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2
// CHECK2-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (%class.TestClass* @tc to i8*), i64 4, i8*** @tc.cache.)
// CHECK2-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to %class.TestClass*
// CHECK2-NEXT: [[TMP15:%.*]] = bitcast %class.TestClass* [[TMP14]] to i8*
// CHECK2-NEXT: store i8* [[TMP15]], i8** [[TMP12]], align 8
// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 3
// CHECK2-NEXT: [[TMP17:%.*]] = bitcast [2 x i8]* [[A2]] to i8*
// CHECK2-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8
// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 4
// CHECK2-NEXT: [[TMP19:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast ([2 x %class.TestClass]* @tc2 to i8*), i64 8, i8*** @tc2.cache.)
// CHECK2-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to [2 x %class.TestClass]*
// CHECK2-NEXT: [[TMP21:%.*]] = bitcast [2 x %class.TestClass]* [[TMP20]] to i8*
// CHECK2-NEXT: store i8* [[TMP21]], i8** [[TMP18]], align 8
// CHECK2-NEXT: [[TMP22:%.*]] = bitcast [5 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*
// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK2-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i64 40, i8* [[TMP22]], void (i8*, i8*)* @.omp.copyprivate.copy_func, i32 [[TMP23]])
// CHECK2-NEXT: [[TMP24:%.*]] = load i8, i8* [[A]], align 1
// CHECK2-NEXT: [[CONV:%.*]] = sext i8 [[TMP24]] to i32
// CHECK2-NEXT: ret i32 [[CONV]]
// CHECK2: terminate.handler:
// CHECK2-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[EXN]]) #[[ATTR13:[0-9]+]]
// CHECK2-NEXT: unreachable
//
//
// CHECK2-LABEL: define {{[^@]+}}@_ZN3SSTIdEC1Ev
// CHECK2-SAME: (%struct.SST* nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8
// CHECK2-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8
// CHECK2-NEXT: [[THIS1:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8
// CHECK2-NEXT: call void @_ZN3SSTIdEC2Ev(%struct.SST* nonnull align 8 dereferenceable(8) [[THIS1]])
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@_ZN2SSC1ERi
// CHECK2-SAME: (%struct.SS* nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32* nonnull align 4 dereferenceable(4) [[D:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8
// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8
// CHECK2-NEXT: store i32* [[D]], i32** [[D_ADDR]], align 8
// CHECK2-NEXT: [[THIS1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[D_ADDR]], align 8
// CHECK2-NEXT: call void @_ZN2SSC2ERi(%struct.SS* nonnull align 8 dereferenceable(16) [[THIS1]], i32* nonnull align 4 dereferenceable(4) [[TMP0]])
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@__clang_call_terminate
// CHECK2-SAME: (i8* [[TMP0:%.*]]) #[[ATTR8:[0-9]+]] comdat {
// CHECK2-NEXT: [[TMP2:%.*]] = call i8* @__cxa_begin_catch(i8* [[TMP0]]) #[[ATTR3]]
// CHECK2-NEXT: call void @_ZSt9terminatev() #[[ATTR13]]
// CHECK2-NEXT: unreachable
//
//
// CHECK2-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func
// CHECK2-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR9:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [5 x i8*]*
// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [5 x i8*]*
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP3]], i64 0, i64 0
// CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP5]], i64 0, i64 0
// CHECK2-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8
// CHECK2-NEXT: [[TMP10:%.*]] = load i8, i8* [[TMP9]], align 1
// CHECK2-NEXT: store i8 [[TMP10]], i8* [[TMP7]], align 1
// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP3]], i64 0, i64 1
// CHECK2-NEXT: [[TMP12:%.*]] = load i8*, i8** [[TMP11]], align 8
// CHECK2-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to %class.TestClass*
// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP5]], i64 0, i64 1
// CHECK2-NEXT: [[TMP15:%.*]] = load i8*, i8** [[TMP14]], align 8
// CHECK2-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to %class.TestClass*
// CHECK2-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(4) %class.TestClass* @_ZN9TestClassaSERKS_(%class.TestClass* nonnull align 4 dereferenceable(4) [[TMP13]], %class.TestClass* nonnull align 4 dereferenceable(4) [[TMP16]])
// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP3]], i64 0, i64 2
// CHECK2-NEXT: [[TMP18:%.*]] = load i8*, i8** [[TMP17]], align 8
// CHECK2-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP18]] to %class.TestClass*
// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP5]], i64 0, i64 2
// CHECK2-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8
// CHECK2-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP21]] to %class.TestClass*
// CHECK2-NEXT: [[CALL2:%.*]] = call nonnull align 4 dereferenceable(4) %class.TestClass* @_ZN9TestClassaSERKS_(%class.TestClass* nonnull align 4 dereferenceable(4) [[TMP19]], %class.TestClass* nonnull align 4 dereferenceable(4) [[TMP22]])
// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP3]], i64 0, i64 3
// CHECK2-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP23]], align 8
// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP5]], i64 0, i64 3
// CHECK2-NEXT: [[TMP26:%.*]] = load i8*, i8** [[TMP25]], align 8
// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[TMP24]], i8* align 1 [[TMP26]], i64 2, i1 false)
// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP3]], i64 0, i64 4
// CHECK2-NEXT: [[TMP28:%.*]] = load i8*, i8** [[TMP27]], align 8
// CHECK2-NEXT: [[TMP29:%.*]] = bitcast i8* [[TMP28]] to %class.TestClass*
// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP5]], i64 0, i64 4
// CHECK2-NEXT: [[TMP31:%.*]] = load i8*, i8** [[TMP30]], align 8
// CHECK2-NEXT: [[TMP32:%.*]] = bitcast i8* [[TMP31]] to %class.TestClass*
// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[TMP29]], i64 2
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %class.TestClass* [[TMP29]], [[TMP33]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK2: omp.arraycpy.body:
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %class.TestClass* [ [[TMP32]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %class.TestClass* [ [[TMP29]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK2-NEXT: [[CALL3:%.*]] = call nonnull align 4 dereferenceable(4) %class.TestClass* @_ZN9TestClassaSERKS_(%class.TestClass* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %class.TestClass* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]])
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[CLASS_TESTCLASS]], %class.TestClass* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[CLASS_TESTCLASS]], %class.TestClass* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %class.TestClass* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP33]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]]
// CHECK2: omp.arraycpy.done4:
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@_ZN9TestClassaSERKS_
// CHECK2-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]], %class.TestClass* nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR10:[0-9]+]] comdat align 2 {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %class.TestClass*, align 8
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca %class.TestClass*, align 8
// CHECK2-NEXT: store %class.TestClass* [[THIS]], %class.TestClass** [[THIS_ADDR]], align 8
// CHECK2-NEXT: store %class.TestClass* [[TMP0]], %class.TestClass** [[DOTADDR]], align 8
// CHECK2-NEXT: [[THIS1:%.*]] = load %class.TestClass*, %class.TestClass** [[THIS_ADDR]], align 8
// CHECK2-NEXT: ret %class.TestClass* [[THIS1]]
//
//
// CHECK2-LABEL: define {{[^@]+}}@_Z15parallel_singlev
// CHECK2-SAME: () #[[ATTR10]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.
// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR12:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
// CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK2: omp_if.then:
// CHECK2-NEXT: invoke void @_Z3foov()
// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
// CHECK2: invoke.cont:
// CHECK2-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
// CHECK2-NEXT: br label [[OMP_IF_END]]
// CHECK2: lpad:
// CHECK2-NEXT: [[TMP4:%.*]] = landingpad { i8*, i32 }
// CHECK2-NEXT: catch i8* null
// CHECK2-NEXT: [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0
// CHECK2-NEXT: store i8* [[TMP5]], i8** [[EXN_SLOT]], align 8
// CHECK2-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 1
// CHECK2-NEXT: store i32 [[TMP6]], i32* [[EHSELECTOR_SLOT]], align 4
// CHECK2-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
// CHECK2-NEXT: br label [[TERMINATE_HANDLER:%.*]]
// CHECK2: omp_if.end:
// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]])
// CHECK2-NEXT: ret void
// CHECK2: terminate.handler:
// CHECK2-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[EXN]]) #[[ATTR13]]
// CHECK2-NEXT: unreachable
//
//
// CHECK2-LABEL: define {{[^@]+}}@_ZN9TestClassC2Ev
// CHECK2-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %class.TestClass*, align 8
// CHECK2-NEXT: store %class.TestClass* [[THIS]], %class.TestClass** [[THIS_ADDR]], align 8
// CHECK2-NEXT: [[THIS1:%.*]] = load %class.TestClass*, %class.TestClass** [[THIS_ADDR]], align 8
// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[THIS1]], i32 0, i32 0
// CHECK2-NEXT: store i32 0, i32* [[A]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@_ZN9TestClassD2Ev
// CHECK2-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %class.TestClass*, align 8
// CHECK2-NEXT: store %class.TestClass* [[THIS]], %class.TestClass** [[THIS_ADDR]], align 8
// CHECK2-NEXT: [[THIS1:%.*]] = load %class.TestClass*, %class.TestClass** [[THIS_ADDR]], align 8
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@_ZN2SSC2ERi
// CHECK2-SAME: (%struct.SS* nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32* nonnull align 4 dereferenceable(4) [[D:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8
// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[A2:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[B4:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[C7:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8
// CHECK2-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8
// CHECK2-NEXT: store i32* [[D]], i32** [[D_ADDR]], align 8
// CHECK2-NEXT: [[THIS1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8
// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[THIS1]], i32 0, i32 0
// CHECK2-NEXT: store i32 0, i32* [[A]], align 8
// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 1
// CHECK2-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[B]], align 4
// CHECK2-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -16
// CHECK2-NEXT: store i8 [[BF_CLEAR]], i8* [[B]], align 4
// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 2
// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[D_ADDR]], align 8
// CHECK2-NEXT: store i32* [[TMP0]], i32** [[C]], align 8
// CHECK2-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 0
// CHECK2-NEXT: store i32* [[A3]], i32** [[A2]], align 8
// CHECK2-NEXT: [[B5:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 1
// CHECK2-NEXT: [[BF_LOAD6:%.*]] = load i8, i8* [[B5]], align 4
// CHECK2-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD6]], 4
// CHECK2-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 4
// CHECK2-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32
// CHECK2-NEXT: store i32 [[BF_CAST]], i32* [[B4]], align 4
// CHECK2-NEXT: [[C8:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 2
// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[C8]], align 8
// CHECK2-NEXT: store i32* [[TMP1]], i32** [[C7]], align 8
// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A2]], align 8
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
// CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[B4]], align 4
// CHECK2-NEXT: [[CONV9:%.*]] = bitcast i64* [[B_CASTED]] to i32*
// CHECK2-NEXT: store i32 [[TMP5]], i32* [[CONV9]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8
// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[C7]], align 8
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
// CHECK2-NEXT: [[CONV10:%.*]] = bitcast i64* [[C_CASTED]] to i32*
// CHECK2-NEXT: store i32 [[TMP8]], i32* [[CONV10]], align 4
// CHECK2-NEXT: [[TMP9:%.*]] = load i64, i64* [[C_CASTED]], align 8
// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i64, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.SS* [[THIS1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP9]])
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..5
// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SS* [[THIS:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]]) #[[ATTR12]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8
// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[TMP:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[_TMP4:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[_TMP5:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 8
// CHECK2-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x i8*], align 8
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8
// CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK2-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8
// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32*
// CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[C_ADDR]] to i32*
// CHECK2-NEXT: store i32* [[CONV]], i32** [[TMP]], align 8
// CHECK2-NEXT: store i32* [[CONV2]], i32** [[_TMP3]], align 8
// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP]], align 8
// CHECK2-NEXT: store i32* [[TMP1]], i32** [[_TMP4]], align 8
// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[_TMP3]], align 8
// CHECK2-NEXT: store i32* [[TMP2]], i32** [[_TMP5]], align 8
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]])
// CHECK2-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
// CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK2: omp_if.then:
// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 0
// CHECK2-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP7]], align 8
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 1
// CHECK2-NEXT: [[TMP9:%.*]] = load i32*, i32** [[_TMP4]], align 8
// CHECK2-NEXT: store i32* [[TMP9]], i32** [[TMP8]], align 8
// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 2
// CHECK2-NEXT: store i32* [[CONV1]], i32** [[TMP10]], align 8
// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 3
// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[_TMP5]], align 8
// CHECK2-NEXT: store i32* [[TMP12]], i32** [[TMP11]], align 8
// CHECK2-NEXT: invoke void @_ZZN2SSC1ERiENKUlvE_clEv(%class.anon* nonnull align 8 dereferenceable(32) [[REF_TMP]])
// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
// CHECK2: invoke.cont:
// CHECK2-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]])
// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK2-NEXT: br label [[OMP_IF_END]]
// CHECK2: lpad:
// CHECK2-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 }
// CHECK2-NEXT: catch i8* null
// CHECK2-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0
// CHECK2-NEXT: store i8* [[TMP14]], i8** [[EXN_SLOT]], align 8
// CHECK2-NEXT: [[TMP15:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 1
// CHECK2-NEXT: store i32 [[TMP15]], i32* [[EHSELECTOR_SLOT]], align 4
// CHECK2-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]])
// CHECK2-NEXT: br label [[TERMINATE_HANDLER:%.*]]
// CHECK2: omp_if.end:
// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0
// CHECK2-NEXT: [[TMP17:%.*]] = load i32*, i32** [[_TMP4]], align 8
// CHECK2-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP17]] to i8*
// CHECK2-NEXT: store i8* [[TMP18]], i8** [[TMP16]], align 8
// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1
// CHECK2-NEXT: [[TMP20:%.*]] = bitcast i32* [[CONV1]] to i8*
// CHECK2-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8
// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2
// CHECK2-NEXT: [[TMP22:%.*]] = load i32*, i32** [[_TMP5]], align 8
// CHECK2-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP22]] to i8*
// CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP21]], align 8
// CHECK2-NEXT: [[TMP24:%.*]] = bitcast [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*
// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK2-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i64 24, i8* [[TMP24]], void (i8*, i8*)* @.omp.copyprivate.copy_func.6, i32 [[TMP25]])
// CHECK2-NEXT: ret void
// CHECK2: terminate.handler:
// CHECK2-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[EXN]]) #[[ATTR13]]
// CHECK2-NEXT: unreachable
//
//
// CHECK2-LABEL: define {{[^@]+}}@_ZZN2SSC1ERiENKUlvE_clEv
// CHECK2-SAME: (%class.anon* nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR10]] align 2 {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon*, align 8
// CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8
// CHECK2-NEXT: store %class.anon* [[THIS]], %class.anon** [[THIS_ADDR]], align 8
// CHECK2-NEXT: [[THIS1:%.*]] = load %class.anon*, %class.anon** [[THIS_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON:%.*]], %class.anon* [[THIS1]], i32 0, i32 0
// CHECK2-NEXT: [[TMP1:%.*]] = load %struct.SS*, %struct.SS** [[TMP0]], align 8
// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[THIS1]], i32 0, i32 1
// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK2-NEXT: store i32 [[INC]], i32* [[TMP3]], align 4
// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[THIS1]], i32 0, i32 2
// CHECK2-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8
// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4
// CHECK2-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP7]], -1
// CHECK2-NEXT: store i32 [[DEC]], i32* [[TMP6]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[THIS1]], i32 0, i32 3
// CHECK2-NEXT: [[TMP9:%.*]] = load i32*, i32** [[TMP8]], align 8
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1
// CHECK2-NEXT: store i32 [[DIV]], i32* [[TMP9]], align 4
// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[THIS1]], i32 0, i32 1
// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 8
// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
// CHECK2-NEXT: store i32 [[TMP13]], i32* [[CONV]], align 4
// CHECK2-NEXT: [[TMP14:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[THIS1]], i32 0, i32 2
// CHECK2-NEXT: [[TMP16:%.*]] = load i32*, i32** [[TMP15]], align 8
// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4
// CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32*
// CHECK2-NEXT: store i32 [[TMP17]], i32* [[CONV2]], align 4
// CHECK2-NEXT: [[TMP18:%.*]] = load i64, i64* [[B_CASTED]], align 8
// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[THIS1]], i32 0, i32 3
// CHECK2-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP19]], align 8
// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
// CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[C_CASTED]] to i32*
// CHECK2-NEXT: store i32 [[TMP21]], i32* [[CONV3]], align 4
// CHECK2-NEXT: [[TMP22:%.*]] = load i64, i64* [[C_CASTED]], align 8
// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i64, i64, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.SS* [[TMP1]], i64 [[TMP14]], i64 [[TMP18]], i64 [[TMP22]])
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.6
// CHECK2-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR9]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [3 x i8*]*
// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [3 x i8*]*
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 0
// CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 0
// CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
// CHECK2-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
// CHECK2-NEXT: store i32 [[TMP12]], i32* [[TMP8]], align 4
// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 1
// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 8
// CHECK2-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i32*
// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 1
// CHECK2-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8
// CHECK2-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i32*
// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4
// CHECK2-NEXT: store i32 [[TMP19]], i32* [[TMP15]], align 4
// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 2
// CHECK2-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8
// CHECK2-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP21]] to i32*
// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 2
// CHECK2-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP23]], align 8
// CHECK2-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to i32*
// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
// CHECK2-NEXT: store i32 [[TMP26]], i32* [[TMP22]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..7
// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SS* [[THIS:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]]) #[[ATTR12]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8
// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[TMP:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[_TMP4:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[_TMP5:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x i8*], align 8
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8
// CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK2-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8
// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32*
// CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[C_ADDR]] to i32*
// CHECK2-NEXT: store i32* [[CONV]], i32** [[TMP]], align 8
// CHECK2-NEXT: store i32* [[CONV2]], i32** [[_TMP3]], align 8
// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP]], align 8
// CHECK2-NEXT: store i32* [[TMP1]], i32** [[_TMP4]], align 8
// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[_TMP3]], align 8
// CHECK2-NEXT: store i32* [[TMP2]], i32** [[_TMP5]], align 8
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]])
// CHECK2-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
// CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK2: omp_if.then:
// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[_TMP4]], align 8
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1
// CHECK2-NEXT: store i32 [[INC]], i32* [[TMP7]], align 4
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 8
// CHECK2-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP9]], -1
// CHECK2-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 8
// CHECK2-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP5]], align 8
// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 1
// CHECK2-NEXT: store i32 [[DIV]], i32* [[TMP10]], align 4
// CHECK2-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]])
// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK2-NEXT: br label [[OMP_IF_END]]
// CHECK2: omp_if.end:
// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0
// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP4]], align 8
// CHECK2-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8*
// CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP12]], align 8
// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1
// CHECK2-NEXT: [[TMP16:%.*]] = bitcast i32* [[CONV1]] to i8*
// CHECK2-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8
// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2
// CHECK2-NEXT: [[TMP18:%.*]] = load i32*, i32** [[_TMP5]], align 8
// CHECK2-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to i8*
// CHECK2-NEXT: store i8* [[TMP19]], i8** [[TMP17]], align 8
// CHECK2-NEXT: [[TMP20:%.*]] = bitcast [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*
// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK2-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i64 24, i8* [[TMP20]], void (i8*, i8*)* @.omp.copyprivate.copy_func.8, i32 [[TMP21]])
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.8
// CHECK2-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR9]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [3 x i8*]*
// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [3 x i8*]*
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 0
// CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 0
// CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
// CHECK2-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
// CHECK2-NEXT: store i32 [[TMP12]], i32* [[TMP8]], align 4
// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 1
// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 8
// CHECK2-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i32*
// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 1
// CHECK2-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8
// CHECK2-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i32*
// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4
// CHECK2-NEXT: store i32 [[TMP19]], i32* [[TMP15]], align 4
// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 2
// CHECK2-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8
// CHECK2-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP21]] to i32*
// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 2
// CHECK2-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP23]], align 8
// CHECK2-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to i32*
// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
// CHECK2-NEXT: store i32 [[TMP26]], i32* [[TMP22]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@_ZN3SSTIdEC2Ev
// CHECK2-SAME: (%struct.SST* nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8
// CHECK2-NEXT: [[A2:%.*]] = alloca double*, align 8
// CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK2-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8
// CHECK2-NEXT: [[THIS1:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8
// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], %struct.SST* [[THIS1]], i32 0, i32 0
// CHECK2-NEXT: store double 0.000000e+00, double* [[A]], align 8
// CHECK2-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SST]], %struct.SST* [[THIS1]], i32 0, i32 0
// CHECK2-NEXT: store double* [[A3]], double** [[A2]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load double*, double** [[A2]], align 8
// CHECK2-NEXT: [[TMP1:%.*]] = load double, double* [[TMP0]], align 8
// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to double*
// CHECK2-NEXT: store double [[TMP1]], double* [[CONV]], align 8
// CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SST*, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.SST* [[THIS1]], i64 [[TMP2]])
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..9
// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SST* [[THIS:%.*]], i64 [[A:%.*]]) #[[ATTR12]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8
// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[TMP:%.*]] = alloca double*, align 8
// CHECK2-NEXT: [[_TMP1:%.*]] = alloca double*, align 8
// CHECK2-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8
// CHECK2-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x i8*], align 8
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8
// CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8
// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to double*
// CHECK2-NEXT: store double* [[CONV]], double** [[TMP]], align 8
// CHECK2-NEXT: [[TMP1:%.*]] = load double*, double** [[TMP]], align 8
// CHECK2-NEXT: store double* [[TMP1]], double** [[_TMP1]], align 8
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]])
// CHECK2-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
// CHECK2-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK2: omp_if.then:
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0
// CHECK2-NEXT: store %struct.SST* [[TMP0]], %struct.SST** [[TMP6]], align 8
// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1
// CHECK2-NEXT: [[TMP8:%.*]] = load double*, double** [[_TMP1]], align 8
// CHECK2-NEXT: store double* [[TMP8]], double** [[TMP7]], align 8
// CHECK2-NEXT: invoke void @_ZZN3SSTIdEC1EvENKUlvE_clEv(%class.anon.0* nonnull align 8 dereferenceable(16) [[REF_TMP]])
// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
// CHECK2: invoke.cont:
// CHECK2-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]])
// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK2-NEXT: br label [[OMP_IF_END]]
// CHECK2: lpad:
// CHECK2-NEXT: [[TMP9:%.*]] = landingpad { i8*, i32 }
// CHECK2-NEXT: catch i8* null
// CHECK2-NEXT: [[TMP10:%.*]] = extractvalue { i8*, i32 } [[TMP9]], 0
// CHECK2-NEXT: store i8* [[TMP10]], i8** [[EXN_SLOT]], align 8
// CHECK2-NEXT: [[TMP11:%.*]] = extractvalue { i8*, i32 } [[TMP9]], 1
// CHECK2-NEXT: store i32 [[TMP11]], i32* [[EHSELECTOR_SLOT]], align 4
// CHECK2-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]])
// CHECK2-NEXT: br label [[TERMINATE_HANDLER:%.*]]
// CHECK2: omp_if.end:
// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0
// CHECK2-NEXT: [[TMP13:%.*]] = load double*, double** [[_TMP1]], align 8
// CHECK2-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to i8*
// CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP12]], align 8
// CHECK2-NEXT: [[TMP15:%.*]] = bitcast [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*
// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK2-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i64 8, i8* [[TMP15]], void (i8*, i8*)* @.omp.copyprivate.copy_func.10, i32 [[TMP16]])
// CHECK2-NEXT: ret void
// CHECK2: terminate.handler:
// CHECK2-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[EXN]]) #[[ATTR13]]
// CHECK2-NEXT: unreachable
//
//
// CHECK2-LABEL: define {{[^@]+}}@_ZZN3SSTIdEC1EvENKUlvE_clEv
// CHECK2-SAME: (%class.anon.0* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR4]] align 2 {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon.0*, align 8
// CHECK2-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8
// CHECK2-NEXT: store %class.anon.0* [[THIS]], %class.anon.0** [[THIS_ADDR]], align 8
// CHECK2-NEXT: [[THIS1:%.*]] = load %class.anon.0*, %class.anon.0** [[THIS_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], %class.anon.0* [[THIS1]], i32 0, i32 0
// CHECK2-NEXT: [[TMP1:%.*]] = load %struct.SST*, %struct.SST** [[TMP0]], align 8
// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0
// CHECK2-NEXT: store %struct.SST* [[TMP1]], %struct.SST** [[TMP2]], align 8
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 1
// CHECK2-NEXT: [[TMP5:%.*]] = load double*, double** [[TMP4]], align 8
// CHECK2-NEXT: store double* [[TMP5]], double** [[TMP3]], align 8
// CHECK2-NEXT: call void @_ZZZN3SSTIdEC1EvENKUlvE_clEvENKUlvE_clEv(%class.anon.1* nonnull align 8 dereferenceable(16) [[REF_TMP]])
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.10
// CHECK2-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR9]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
// CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to double*
// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
// CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
// CHECK2-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to double*
// CHECK2-NEXT: [[TMP12:%.*]] = load double, double* [[TMP11]], align 8
// CHECK2-NEXT: store double [[TMP12]], double* [[TMP8]], align 8
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@_ZZZN3SSTIdEC1EvENKUlvE_clEvENKUlvE_clEv
// CHECK2-SAME: (%class.anon.1* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR10]] align 2 {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon.1*, align 8
// CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK2-NEXT: store %class.anon.1* [[THIS]], %class.anon.1** [[THIS_ADDR]], align 8
// CHECK2-NEXT: [[THIS1:%.*]] = load %class.anon.1*, %class.anon.1** [[THIS_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1:%.*]], %class.anon.1* [[THIS1]], i32 0, i32 0
// CHECK2-NEXT: [[TMP1:%.*]] = load %struct.SST*, %struct.SST** [[TMP0]], align 8
// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 1
// CHECK2-NEXT: [[TMP3:%.*]] = load double*, double** [[TMP2]], align 8
// CHECK2-NEXT: [[TMP4:%.*]] = load double, double* [[TMP3]], align 8
// CHECK2-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
// CHECK2-NEXT: store double [[INC]], double* [[TMP3]], align 8
// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 1
// CHECK2-NEXT: [[TMP6:%.*]] = load double*, double** [[TMP5]], align 8
// CHECK2-NEXT: [[TMP7:%.*]] = load double, double* [[TMP6]], align 8
// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to double*
// CHECK2-NEXT: store double [[TMP7]], double* [[CONV]], align 8
// CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SST*, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.SST* [[TMP1]], i64 [[TMP8]])
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..11
// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SST* [[THIS:%.*]], i64 [[A:%.*]]) #[[ATTR12]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8
// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[TMP:%.*]] = alloca double*, align 8
// CHECK2-NEXT: [[_TMP1:%.*]] = alloca double*, align 8
// CHECK2-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x i8*], align 8
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8
// CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8
// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to double*
// CHECK2-NEXT: store double* [[CONV]], double** [[TMP]], align 8
// CHECK2-NEXT: [[TMP1:%.*]] = load double*, double** [[TMP]], align 8
// CHECK2-NEXT: store double* [[TMP1]], double** [[_TMP1]], align 8
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]])
// CHECK2-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
// CHECK2-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK2: omp_if.then:
// CHECK2-NEXT: [[TMP6:%.*]] = load double*, double** [[_TMP1]], align 8
// CHECK2-NEXT: [[TMP7:%.*]] = load double, double* [[TMP6]], align 8
// CHECK2-NEXT: [[INC:%.*]] = fadd double [[TMP7]], 1.000000e+00
// CHECK2-NEXT: store double [[INC]], double* [[TMP6]], align 8
// CHECK2-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]])
// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK2-NEXT: br label [[OMP_IF_END]]
// CHECK2: omp_if.end:
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0
// CHECK2-NEXT: [[TMP9:%.*]] = load double*, double** [[_TMP1]], align 8
// CHECK2-NEXT: [[TMP10:%.*]] = bitcast double* [[TMP9]] to i8*
// CHECK2-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 8
// CHECK2-NEXT: [[TMP11:%.*]] = bitcast [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK2-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i64 8, i8* [[TMP11]], void (i8*, i8*)* @.omp.copyprivate.copy_func.12, i32 [[TMP12]])
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.12
// CHECK2-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR9]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
// CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to double*
// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
// CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
// CHECK2-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to double*
// CHECK2-NEXT: [[TMP12:%.*]] = load double, double* [[TMP11]], align 8
// CHECK2-NEXT: store double [[TMP12]], double* [[TMP8]], align 8
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_single_codegen.cpp
// CHECK2-SAME: () #[[ATTR0]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: call void @__cxx_global_var_init()
// CHECK2-NEXT: call void @__cxx_global_var_init.1()
// CHECK2-NEXT: call void @.__omp_threadprivate_init_.()
// CHECK2-NEXT: call void @.__omp_threadprivate_init_..4()
// CHECK2-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_.
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK3-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK3-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK3-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*
// CHECK3-NEXT: call void @_ZN9TestClassC1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[TMP2]])
// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK3-NEXT: ret i8* [[TMP3]]
//
//
// CHECK3-LABEL: define {{[^@]+}}@_ZN9TestClassC1Ev
// CHECK3-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %class.TestClass*, align 8
// CHECK3-NEXT: store %class.TestClass* [[THIS]], %class.TestClass** [[THIS_ADDR]], align 8
// CHECK3-NEXT: [[THIS1:%.*]] = load %class.TestClass*, %class.TestClass** [[THIS_ADDR]], align 8
// CHECK3-NEXT: call void @_ZN9TestClassC2Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS1]])
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_.
// CHECK3-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK3-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK3-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*
// CHECK3-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[TMP2]]) #[[ATTR3:[0-9]+]]
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@_ZN9TestClassD1Ev
// CHECK3-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] comdat align 2 {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %class.TestClass*, align 8
// CHECK3-NEXT: store %class.TestClass* [[THIS]], %class.TestClass** [[THIS_ADDR]], align 8
// CHECK3-NEXT: [[THIS1:%.*]] = load %class.TestClass*, %class.TestClass** [[THIS_ADDR]], align 8
// CHECK3-NEXT: call void @_ZN9TestClassD2Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR3]]
// CHECK3-NEXT: ret void
//
//
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK3-LABEL: define {{[^@]+}}@.__omp_threadprivate_init_.
// CHECK3-SAME: () #[[ATTR0]] {
// CHECK3-NEXT: entry:
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK3-NEXT: call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast (%class.TestClass* @tc to i8*), i8* (i8*)* @.__kmpc_global_ctor_., i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_.)
// CHECK3-NEXT: ret void
//
//
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK3-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_..1
// CHECK3-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK3-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8
// CHECK3-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK3-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK3-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [2 x %class.TestClass]*
// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %class.TestClass], [2 x %class.TestClass]* [[TMP2]], i32 0, i32 0
// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAY_BEGIN]], i64 2
// CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]]
// CHECK3: arrayctor.loop:
// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %class.TestClass* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[INVOKE_CONT:%.*]] ]
// CHECK3-NEXT: invoke void @_ZN9TestClassC1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]])
// CHECK3-NEXT: to label [[INVOKE_CONT]] unwind label [[LPAD:%.*]]
// CHECK3: invoke.cont:
// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYCTOR_CUR]], i64 1
// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]]
// CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]]
// CHECK3: arrayctor.cont:
// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK3-NEXT: ret i8* [[TMP3]]
// CHECK3: lpad:
// CHECK3-NEXT: [[TMP4:%.*]] = landingpad { i8*, i32 }
// CHECK3-NEXT: cleanup
// CHECK3-NEXT: [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0
// CHECK3-NEXT: store i8* [[TMP5]], i8** [[EXN_SLOT]], align 8
// CHECK3-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 1
// CHECK3-NEXT: store i32 [[TMP6]], i32* [[EHSELECTOR_SLOT]], align 4
// CHECK3-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %class.TestClass* [[ARRAY_BEGIN]], [[ARRAYCTOR_CUR]]
// CHECK3-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY:%.*]]
// CHECK3: arraydestroy.body:
// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[ARRAYCTOR_CUR]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
// CHECK3-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1]], label [[ARRAYDESTROY_BODY]]
// CHECK3: arraydestroy.done1:
// CHECK3-NEXT: br label [[EH_RESUME:%.*]]
// CHECK3: eh.resume:
// CHECK3-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
// CHECK3-NEXT: [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4
// CHECK3-NEXT: [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0
// CHECK3-NEXT: [[LPAD_VAL2:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1
// CHECK3-NEXT: resume { i8*, i32 } [[LPAD_VAL2]]
//
//
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK3-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_..2
// CHECK3-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK3-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*
// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAY_BEGIN]], i64 2
// CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
// CHECK3: arraydestroy.body:
// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
// CHECK3-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]]
// CHECK3: arraydestroy.done1:
// CHECK3-NEXT: ret void
//
//
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK3-LABEL: define {{[^@]+}}@.__omp_threadprivate_init_..3
// CHECK3-SAME: () #[[ATTR0]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast ([2 x %class.TestClass]* @tc2 to i8*), i8* (i8*)* @.__kmpc_global_ctor_..1, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..2)
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@__cxx_global_var_init
// CHECK3-SAME: () #[[ATTR0]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: call void @_ZN9TestClassC1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) @tc)
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%class.TestClass*)* @_ZN9TestClassD1Ev to void (i8*)*), i8* bitcast (%class.TestClass* @tc to i8*), i8* @__dso_handle) #[[ATTR3]]
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@__cxx_global_var_init.4
// CHECK3-SAME: () #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8
// CHECK3-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
// CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]]
// CHECK3: arrayctor.loop:
// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %class.TestClass* [ getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[INVOKE_CONT:%.*]] ]
// CHECK3-NEXT: invoke void @_ZN9TestClassC1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]])
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK3-NEXT: to label [[INVOKE_CONT]] unwind label [[LPAD:%.*]]
// CHECK3: invoke.cont:
// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAYCTOR_CUR]], i64 1
// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYCTOR_NEXT]], getelementptr inbounds ([[CLASS_TESTCLASS]], %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), i64 2)
// CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]]
// CHECK3: arrayctor.cont:
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR3]]
// CHECK3-NEXT: ret void
// CHECK3: lpad:
// CHECK3-NEXT: [[TMP1:%.*]] = landingpad { i8*, i32 }
// CHECK3-NEXT: cleanup
// CHECK3-NEXT: [[TMP2:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 0
// CHECK3-NEXT: store i8* [[TMP2]], i8** [[EXN_SLOT]], align 8
// CHECK3-NEXT: [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 1
// CHECK3-NEXT: store i32 [[TMP3]], i32* [[EHSELECTOR_SLOT]], align 4
// CHECK3-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), [[ARRAYCTOR_CUR]]
// CHECK3-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY:%.*]]
// CHECK3: arraydestroy.body:
// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[ARRAYCTOR_CUR]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
// CHECK3-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0)
// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1]], label [[ARRAYDESTROY_BODY]]
// CHECK3: arraydestroy.done1:
// CHECK3-NEXT: br label [[EH_RESUME:%.*]]
// CHECK3: eh.resume:
// CHECK3-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
// CHECK3-NEXT: [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4
// CHECK3-NEXT: [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0
// CHECK3-NEXT: [[LPAD_VAL2:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1
// CHECK3-NEXT: resume { i8*, i32 } [[LPAD_VAL2]]
//
//
// CHECK3-LABEL: define {{[^@]+}}@__cxx_global_array_dtor
// CHECK3-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
// CHECK3: arraydestroy.body:
// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ getelementptr inbounds ([[CLASS_TESTCLASS:%.*]], %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
// CHECK3-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0)
// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]]
// CHECK3: arraydestroy.done1:
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@_ZN9TestClassC2Ev
// CHECK3-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %class.TestClass*, align 8
// CHECK3-NEXT: store %class.TestClass* [[THIS]], %class.TestClass** [[THIS_ADDR]], align 8
// CHECK3-NEXT: [[THIS1:%.*]] = load %class.TestClass*, %class.TestClass** [[THIS_ADDR]], align 8
// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[THIS1]], i32 0, i32 0
// CHECK3-NEXT: store i32 0, i32* [[A]], align 4
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@_ZN9TestClassD2Ev
// CHECK3-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %class.TestClass*, align 8
// CHECK3-NEXT: store %class.TestClass* [[THIS]], %class.TestClass** [[THIS_ADDR]], align 8
// CHECK3-NEXT: [[THIS1:%.*]] = load %class.TestClass*, %class.TestClass** [[THIS_ADDR]], align 8
// CHECK3-NEXT: ret void
//
//
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK3-LABEL: define {{[^@]+}}@_Z3foov
// CHECK3-SAME: () #[[ATTR4:[0-9]+]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: call void @_Z8mayThrowv()
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@main
// CHECK3-SAME: () #[[ATTR6:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[A:%.*]] = alloca i8, align 1
// CHECK3-NEXT: [[A2:%.*]] = alloca [2 x i8], align 1
// CHECK3-NEXT: [[C:%.*]] = alloca %class.TestClass*, align 8
// CHECK3-NEXT: [[SST:%.*]] = alloca [[STRUCT_SST:%.*]], align 8
// CHECK3-NEXT: [[SS:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
// CHECK3-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8
// CHECK3-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [5 x i8*], align 8
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: store i32 0, i32* [[RETVAL]], align 4
// CHECK3-NEXT: store %class.TestClass* @tc, %class.TestClass** [[C]], align 8
// CHECK3-NEXT: call void @_ZN3SSTIdEC1Ev(%struct.SST* nonnull align 8 dereferenceable(8) [[SST]])
// CHECK3-NEXT: call void @_ZN2SSC1ERi(%struct.SS* nonnull align 8 dereferenceable(16) [[SS]], i32* nonnull align 4 dereferenceable(4) getelementptr inbounds ([[CLASS_TESTCLASS:%.*]], %class.TestClass* @tc, i32 0, i32 0))
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK3-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK3: omp_if.then:
// CHECK3-NEXT: store i8 2, i8* [[A]], align 1
// CHECK3-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: br label [[OMP_IF_END]]
// CHECK3: omp_if.end:
// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
// CHECK3-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN1:%.*]], label [[OMP_IF_END2:%.*]]
// CHECK3: omp_if.then1:
// CHECK3-NEXT: store i8 2, i8* [[A]], align 1
// CHECK3-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: br label [[OMP_IF_END2]]
// CHECK3: omp_if.end2:
// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
// CHECK3-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN3:%.*]], label [[OMP_IF_END4:%.*]]
// CHECK3: omp_if.then3:
// CHECK3-NEXT: invoke void @_Z3foov()
// CHECK3-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
// CHECK3: invoke.cont:
// CHECK3-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK3-NEXT: br label [[OMP_IF_END4]]
// CHECK3: lpad:
// CHECK3-NEXT: [[TMP7:%.*]] = landingpad { i8*, i32 }
// CHECK3-NEXT: catch i8* null
// CHECK3-NEXT: [[TMP8:%.*]] = extractvalue { i8*, i32 } [[TMP7]], 0
// CHECK3-NEXT: store i8* [[TMP8]], i8** [[EXN_SLOT]], align 8
// CHECK3-NEXT: [[TMP9:%.*]] = extractvalue { i8*, i32 } [[TMP7]], 1
// CHECK3-NEXT: store i32 [[TMP9]], i32* [[EHSELECTOR_SLOT]], align 4
// CHECK3-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: br label [[TERMINATE_HANDLER:%.*]]
// CHECK3: omp_if.end4:
// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0
// CHECK3-NEXT: store i8* [[A]], i8** [[TMP10]], align 8
// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1
// CHECK3-NEXT: store i8* bitcast (%class.TestClass* @tc to i8*), i8** [[TMP11]], align 8
// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2
// CHECK3-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (%class.TestClass* @tc to i8*), i64 4, i8*** @tc.cache.)
// CHECK3-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to %class.TestClass*
// CHECK3-NEXT: [[TMP15:%.*]] = bitcast %class.TestClass* [[TMP14]] to i8*
// CHECK3-NEXT: store i8* [[TMP15]], i8** [[TMP12]], align 8
// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 3
// CHECK3-NEXT: [[TMP17:%.*]] = bitcast [2 x i8]* [[A2]] to i8*
// CHECK3-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8
// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 4
// CHECK3-NEXT: [[TMP19:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast ([2 x %class.TestClass]* @tc2 to i8*), i64 8, i8*** @tc2.cache.)
// CHECK3-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to [2 x %class.TestClass]*
// CHECK3-NEXT: [[TMP21:%.*]] = bitcast [2 x %class.TestClass]* [[TMP20]] to i8*
// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP18]], align 8
// CHECK3-NEXT: [[TMP22:%.*]] = bitcast [5 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*
// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK3-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i64 40, i8* [[TMP22]], void (i8*, i8*)* @.omp.copyprivate.copy_func, i32 [[TMP23]])
// CHECK3-NEXT: [[TMP24:%.*]] = load i8, i8* [[A]], align 1
// CHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP24]] to i32
// CHECK3-NEXT: ret i32 [[CONV]]
// CHECK3: terminate.handler:
// CHECK3-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
// CHECK3-NEXT: call void @__clang_call_terminate(i8* [[EXN]]) #[[ATTR13:[0-9]+]]
// CHECK3-NEXT: unreachable
//
//
// CHECK3-LABEL: define {{[^@]+}}@_ZN3SSTIdEC1Ev
// CHECK3-SAME: (%struct.SST* nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8
// CHECK3-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8
// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8
// CHECK3-NEXT: call void @_ZN3SSTIdEC2Ev(%struct.SST* nonnull align 8 dereferenceable(8) [[THIS1]])
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@_ZN2SSC1ERi
// CHECK3-SAME: (%struct.SS* nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32* nonnull align 4 dereferenceable(4) [[D:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8
// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i32*, align 8
// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8
// CHECK3-NEXT: store i32* [[D]], i32** [[D_ADDR]], align 8
// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8
// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[D_ADDR]], align 8
// CHECK3-NEXT: call void @_ZN2SSC2ERi(%struct.SS* nonnull align 8 dereferenceable(16) [[THIS1]], i32* nonnull align 4 dereferenceable(4) [[TMP0]])
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@__clang_call_terminate
// CHECK3-SAME: (i8* [[TMP0:%.*]]) #[[ATTR8:[0-9]+]] comdat {
// CHECK3-NEXT: [[TMP2:%.*]] = call i8* @__cxa_begin_catch(i8* [[TMP0]]) #[[ATTR3]]
// CHECK3-NEXT: call void @_ZSt9terminatev() #[[ATTR13]]
// CHECK3-NEXT: unreachable
//
//
// CHECK3-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func
// CHECK3-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR9:[0-9]+]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK3-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK3-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK3-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [5 x i8*]*
// CHECK3-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [5 x i8*]*
// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP3]], i64 0, i64 0
// CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP5]], i64 0, i64 0
// CHECK3-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8
// CHECK3-NEXT: [[TMP10:%.*]] = load i8, i8* [[TMP9]], align 1
// CHECK3-NEXT: store i8 [[TMP10]], i8* [[TMP7]], align 1
// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP3]], i64 0, i64 1
// CHECK3-NEXT: [[TMP12:%.*]] = load i8*, i8** [[TMP11]], align 8
// CHECK3-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to %class.TestClass*
// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP5]], i64 0, i64 1
// CHECK3-NEXT: [[TMP15:%.*]] = load i8*, i8** [[TMP14]], align 8
// CHECK3-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to %class.TestClass*
// CHECK3-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(4) %class.TestClass* @_ZN9TestClassaSERKS_(%class.TestClass* nonnull align 4 dereferenceable(4) [[TMP13]], %class.TestClass* nonnull align 4 dereferenceable(4) [[TMP16]])
// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP3]], i64 0, i64 2
// CHECK3-NEXT: [[TMP18:%.*]] = load i8*, i8** [[TMP17]], align 8
// CHECK3-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP18]] to %class.TestClass*
// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP5]], i64 0, i64 2
// CHECK3-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8
// CHECK3-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP21]] to %class.TestClass*
// CHECK3-NEXT: [[CALL2:%.*]] = call nonnull align 4 dereferenceable(4) %class.TestClass* @_ZN9TestClassaSERKS_(%class.TestClass* nonnull align 4 dereferenceable(4) [[TMP19]], %class.TestClass* nonnull align 4 dereferenceable(4) [[TMP22]])
// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP3]], i64 0, i64 3
// CHECK3-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP23]], align 8
// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP5]], i64 0, i64 3
// CHECK3-NEXT: [[TMP26:%.*]] = load i8*, i8** [[TMP25]], align 8
// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[TMP24]], i8* align 1 [[TMP26]], i64 2, i1 false)
// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP3]], i64 0, i64 4
// CHECK3-NEXT: [[TMP28:%.*]] = load i8*, i8** [[TMP27]], align 8
// CHECK3-NEXT: [[TMP29:%.*]] = bitcast i8* [[TMP28]] to %class.TestClass*
// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP5]], i64 0, i64 4
// CHECK3-NEXT: [[TMP31:%.*]] = load i8*, i8** [[TMP30]], align 8
// CHECK3-NEXT: [[TMP32:%.*]] = bitcast i8* [[TMP31]] to %class.TestClass*
// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[TMP29]], i64 2
// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %class.TestClass* [[TMP29]], [[TMP33]]
// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK3: omp.arraycpy.body:
// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %class.TestClass* [ [[TMP32]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %class.TestClass* [ [[TMP29]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK3-NEXT: [[CALL3:%.*]] = call nonnull align 4 dereferenceable(4) %class.TestClass* @_ZN9TestClassaSERKS_(%class.TestClass* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %class.TestClass* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]])
// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[CLASS_TESTCLASS]], %class.TestClass* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[CLASS_TESTCLASS]], %class.TestClass* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %class.TestClass* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP33]]
// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]]
// CHECK3: omp.arraycpy.done4:
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@_ZN9TestClassaSERKS_
// CHECK3-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]], %class.TestClass* nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR10:[0-9]+]] comdat align 2 {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %class.TestClass*, align 8
// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca %class.TestClass*, align 8
// CHECK3-NEXT: store %class.TestClass* [[THIS]], %class.TestClass** [[THIS_ADDR]], align 8
// CHECK3-NEXT: store %class.TestClass* [[TMP0]], %class.TestClass** [[DOTADDR]], align 8
// CHECK3-NEXT: [[THIS1:%.*]] = load %class.TestClass*, %class.TestClass** [[THIS_ADDR]], align 8
// CHECK3-NEXT: ret %class.TestClass* [[THIS1]]
//
//
// CHECK3-LABEL: define {{[^@]+}}@_ZN3SSTIdEC2Ev
// CHECK3-SAME: (%struct.SST* nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8
// CHECK3-NEXT: [[A2:%.*]] = alloca double*, align 8
// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK3-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8
// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8
// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], %struct.SST* [[THIS1]], i32 0, i32 0
// CHECK3-NEXT: store double 0.000000e+00, double* [[A]], align 8
// CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SST]], %struct.SST* [[THIS1]], i32 0, i32 0
// CHECK3-NEXT: store double* [[A3]], double** [[A2]], align 8
// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[A2]], align 8
// CHECK3-NEXT: [[TMP1:%.*]] = load double, double* [[TMP0]], align 8
// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to double*
// CHECK3-NEXT: store double [[TMP1]], double* [[CONV]], align 8
// CHECK3-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SST*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SST* [[THIS1]], i64 [[TMP2]])
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined.
// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SST* [[THIS:%.*]], i64 [[A:%.*]]) #[[ATTR12:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8
// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK3-NEXT: [[TMP:%.*]] = alloca double*, align 8
// CHECK3-NEXT: [[_TMP1:%.*]] = alloca double*, align 8
// CHECK3-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 8
// CHECK3-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8
// CHECK3-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x i8*], align 8
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK3-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8
// CHECK3-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8
// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to double*
// CHECK3-NEXT: store double* [[CONV]], double** [[TMP]], align 8
// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[TMP]], align 8
// CHECK3-NEXT: store double* [[TMP1]], double** [[_TMP1]], align 8
// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]])
// CHECK3-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
// CHECK3-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK3: omp_if.then:
// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 0
// CHECK3-NEXT: store %struct.SST* [[TMP0]], %struct.SST** [[TMP6]], align 8
// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 1
// CHECK3-NEXT: [[TMP8:%.*]] = load double*, double** [[_TMP1]], align 8
// CHECK3-NEXT: store double* [[TMP8]], double** [[TMP7]], align 8
// CHECK3-NEXT: invoke void @_ZZN3SSTIdEC1EvENKUlvE_clEv(%class.anon* nonnull align 8 dereferenceable(16) [[REF_TMP]])
// CHECK3-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
// CHECK3: invoke.cont:
// CHECK3-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]])
// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK3-NEXT: br label [[OMP_IF_END]]
// CHECK3: lpad:
// CHECK3-NEXT: [[TMP9:%.*]] = landingpad { i8*, i32 }
// CHECK3-NEXT: catch i8* null
// CHECK3-NEXT: [[TMP10:%.*]] = extractvalue { i8*, i32 } [[TMP9]], 0
// CHECK3-NEXT: store i8* [[TMP10]], i8** [[EXN_SLOT]], align 8
// CHECK3-NEXT: [[TMP11:%.*]] = extractvalue { i8*, i32 } [[TMP9]], 1
// CHECK3-NEXT: store i32 [[TMP11]], i32* [[EHSELECTOR_SLOT]], align 4
// CHECK3-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]])
// CHECK3-NEXT: br label [[TERMINATE_HANDLER:%.*]]
// CHECK3: omp_if.end:
// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0
// CHECK3-NEXT: [[TMP13:%.*]] = load double*, double** [[_TMP1]], align 8
// CHECK3-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to i8*
// CHECK3-NEXT: store i8* [[TMP14]], i8** [[TMP12]], align 8
// CHECK3-NEXT: [[TMP15:%.*]] = bitcast [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*
// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK3-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i64 8, i8* [[TMP15]], void (i8*, i8*)* @.omp.copyprivate.copy_func.5, i32 [[TMP16]])
// CHECK3-NEXT: ret void
// CHECK3: terminate.handler:
// CHECK3-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
// CHECK3-NEXT: call void @__clang_call_terminate(i8* [[EXN]]) #[[ATTR13]]
// CHECK3-NEXT: unreachable
//
//
// CHECK3-LABEL: define {{[^@]+}}@_ZZN3SSTIdEC1EvENKUlvE_clEv
// CHECK3-SAME: (%class.anon* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR4]] align 2 {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon*, align 8
// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8
// CHECK3-NEXT: store %class.anon* [[THIS]], %class.anon** [[THIS_ADDR]], align 8
// CHECK3-NEXT: [[THIS1:%.*]] = load %class.anon*, %class.anon** [[THIS_ADDR]], align 8
// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON:%.*]], %class.anon* [[THIS1]], i32 0, i32 0
// CHECK3-NEXT: [[TMP1:%.*]] = load %struct.SST*, %struct.SST** [[TMP0]], align 8
// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0
// CHECK3-NEXT: store %struct.SST* [[TMP1]], %struct.SST** [[TMP2]], align 8
// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1
// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[THIS1]], i32 0, i32 1
// CHECK3-NEXT: [[TMP5:%.*]] = load double*, double** [[TMP4]], align 8
// CHECK3-NEXT: store double* [[TMP5]], double** [[TMP3]], align 8
// CHECK3-NEXT: call void @_ZZZN3SSTIdEC1EvENKUlvE_clEvENKUlvE_clEv(%class.anon.0* nonnull align 8 dereferenceable(16) [[REF_TMP]])
// CHECK3-NEXT: ret void
//
//
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK3-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.5
// CHECK3-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR9]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK3-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK3-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK3-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
// CHECK3-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
// CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to double*
// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
// CHECK3-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to double*
// CHECK3-NEXT: [[TMP12:%.*]] = load double, double* [[TMP11]], align 8
// CHECK3-NEXT: store double [[TMP12]], double* [[TMP8]], align 8
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@_ZZZN3SSTIdEC1EvENKUlvE_clEvENKUlvE_clEv
// CHECK3-SAME: (%class.anon.0* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR10]] align 2 {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon.0*, align 8
// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK3-NEXT: store %class.anon.0* [[THIS]], %class.anon.0** [[THIS_ADDR]], align 8
// CHECK3-NEXT: [[THIS1:%.*]] = load %class.anon.0*, %class.anon.0** [[THIS_ADDR]], align 8
// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], %class.anon.0* [[THIS1]], i32 0, i32 0
// CHECK3-NEXT: [[TMP1:%.*]] = load %struct.SST*, %struct.SST** [[TMP0]], align 8
// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 1
// CHECK3-NEXT: [[TMP3:%.*]] = load double*, double** [[TMP2]], align 8
// CHECK3-NEXT: [[TMP4:%.*]] = load double, double* [[TMP3]], align 8
// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
// CHECK3-NEXT: store double [[INC]], double* [[TMP3]], align 8
// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 1
// CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[TMP5]], align 8
// CHECK3-NEXT: [[TMP7:%.*]] = load double, double* [[TMP6]], align 8
// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to double*
// CHECK3-NEXT: store double [[TMP7]], double* [[CONV]], align 8
// CHECK3-NEXT: [[TMP8:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SST*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SST* [[TMP1]], i64 [[TMP8]])
// CHECK3-NEXT: ret void
//
//
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6
// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SST* [[THIS:%.*]], i64 [[A:%.*]]) #[[ATTR12]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8
// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK3-NEXT: [[TMP:%.*]] = alloca double*, align 8
// CHECK3-NEXT: [[_TMP1:%.*]] = alloca double*, align 8
// CHECK3-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x i8*], align 8
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK3-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8
// CHECK3-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8
// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to double*
// CHECK3-NEXT: store double* [[CONV]], double** [[TMP]], align 8
// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[TMP]], align 8
// CHECK3-NEXT: store double* [[TMP1]], double** [[_TMP1]], align 8
// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]])
// CHECK3-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
// CHECK3-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK3: omp_if.then:
// CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[_TMP1]], align 8
// CHECK3-NEXT: [[TMP7:%.*]] = load double, double* [[TMP6]], align 8
// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP7]], 1.000000e+00
// CHECK3-NEXT: store double [[INC]], double* [[TMP6]], align 8
// CHECK3-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]])
// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK3-NEXT: br label [[OMP_IF_END]]
// CHECK3: omp_if.end:
// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0
// CHECK3-NEXT: [[TMP9:%.*]] = load double*, double** [[_TMP1]], align 8
// CHECK3-NEXT: [[TMP10:%.*]] = bitcast double* [[TMP9]] to i8*
// CHECK3-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 8
// CHECK3-NEXT: [[TMP11:%.*]] = bitcast [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*
// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK3-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i64 8, i8* [[TMP11]], void (i8*, i8*)* @.omp.copyprivate.copy_func.7, i32 [[TMP12]])
// CHECK3-NEXT: ret void
//
//
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK3-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.7
// CHECK3-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR9]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK3-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK3-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK3-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
// CHECK3-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
// CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to double*
// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
// CHECK3-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to double*
// CHECK3-NEXT: [[TMP12:%.*]] = load double, double* [[TMP11]], align 8
// CHECK3-NEXT: store double [[TMP12]], double* [[TMP8]], align 8
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@_ZN2SSC2ERi
// CHECK3-SAME: (%struct.SS* nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32* nonnull align 4 dereferenceable(4) [[D:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8
// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i32*, align 8
// CHECK3-NEXT: [[A2:%.*]] = alloca i32*, align 8
// CHECK3-NEXT: [[B4:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[C7:%.*]] = alloca i32*, align 8
// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
// CHECK3-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8
// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8
// CHECK3-NEXT: store i32* [[D]], i32** [[D_ADDR]], align 8
// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8
// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[THIS1]], i32 0, i32 0
// CHECK3-NEXT: store i32 0, i32* [[A]], align 8
// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 1
// CHECK3-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[B]], align 4
// CHECK3-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -16
// CHECK3-NEXT: store i8 [[BF_CLEAR]], i8* [[B]], align 4
// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 2
// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[D_ADDR]], align 8
// CHECK3-NEXT: store i32* [[TMP0]], i32** [[C]], align 8
// CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 0
// CHECK3-NEXT: store i32* [[A3]], i32** [[A2]], align 8
// CHECK3-NEXT: [[B5:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 1
// CHECK3-NEXT: [[BF_LOAD6:%.*]] = load i8, i8* [[B5]], align 4
// CHECK3-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD6]], 4
// CHECK3-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 4
// CHECK3-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32
// CHECK3-NEXT: store i32 [[BF_CAST]], i32* [[B4]], align 4
// CHECK3-NEXT: [[C8:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 2
// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[C8]], align 8
// CHECK3-NEXT: store i32* [[TMP1]], i32** [[C7]], align 8
// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A2]], align 8
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
// CHECK3-NEXT: store i32 [[TMP3]], i32* [[CONV]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[B4]], align 4
// CHECK3-NEXT: [[CONV9:%.*]] = bitcast i64* [[B_CASTED]] to i32*
// CHECK3-NEXT: store i32 [[TMP5]], i32* [[CONV9]], align 4
// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8
// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[C7]], align 8
// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
// CHECK3-NEXT: [[CONV10:%.*]] = bitcast i64* [[C_CASTED]] to i32*
// CHECK3-NEXT: store i32 [[TMP8]], i32* [[CONV10]], align 4
// CHECK3-NEXT: [[TMP9:%.*]] = load i64, i64* [[C_CASTED]], align 8
// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i64, i64, i64)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.SS* [[THIS1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP9]])
// CHECK3-NEXT: ret void
//
//
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..8
// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SS* [[THIS:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]]) #[[ATTR12]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8
// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8
// CHECK3-NEXT: [[TMP:%.*]] = alloca i32*, align 8
// CHECK3-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8
// CHECK3-NEXT: [[_TMP4:%.*]] = alloca i32*, align 8
// CHECK3-NEXT: [[_TMP5:%.*]] = alloca i32*, align 8
// CHECK3-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8
// CHECK3-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8
// CHECK3-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x i8*], align 8
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8
// CHECK3-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK3-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK3-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8
// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8
// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32*
// CHECK3-NEXT: [[CONV2:%.*]] = bitcast i64* [[C_ADDR]] to i32*
// CHECK3-NEXT: store i32* [[CONV]], i32** [[TMP]], align 8
// CHECK3-NEXT: store i32* [[CONV2]], i32** [[_TMP3]], align 8
// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP]], align 8
// CHECK3-NEXT: store i32* [[TMP1]], i32** [[_TMP4]], align 8
// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[_TMP3]], align 8
// CHECK3-NEXT: store i32* [[TMP2]], i32** [[_TMP5]], align 8
// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]])
// CHECK3-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
// CHECK3-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK3: omp_if.then:
// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0
// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP7]], align 8
// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1
// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[_TMP4]], align 8
// CHECK3-NEXT: store i32* [[TMP9]], i32** [[TMP8]], align 8
// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2
// CHECK3-NEXT: store i32* [[CONV1]], i32** [[TMP10]], align 8
// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3
// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[_TMP5]], align 8
// CHECK3-NEXT: store i32* [[TMP12]], i32** [[TMP11]], align 8
// CHECK3-NEXT: invoke void @_ZZN2SSC1ERiENKUlvE_clEv(%class.anon.1* nonnull align 8 dereferenceable(32) [[REF_TMP]])
// CHECK3-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
// CHECK3: invoke.cont:
// CHECK3-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]])
// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK3-NEXT: br label [[OMP_IF_END]]
// CHECK3: lpad:
// CHECK3-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 }
// CHECK3-NEXT: catch i8* null
// CHECK3-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0
// CHECK3-NEXT: store i8* [[TMP14]], i8** [[EXN_SLOT]], align 8
// CHECK3-NEXT: [[TMP15:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 1
// CHECK3-NEXT: store i32 [[TMP15]], i32* [[EHSELECTOR_SLOT]], align 4
// CHECK3-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]])
// CHECK3-NEXT: br label [[TERMINATE_HANDLER:%.*]]
// CHECK3: omp_if.end:
// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0
// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[_TMP4]], align 8
// CHECK3-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP17]] to i8*
// CHECK3-NEXT: store i8* [[TMP18]], i8** [[TMP16]], align 8
// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1
// CHECK3-NEXT: [[TMP20:%.*]] = bitcast i32* [[CONV1]] to i8*
// CHECK3-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8
// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2
// CHECK3-NEXT: [[TMP22:%.*]] = load i32*, i32** [[_TMP5]], align 8
// CHECK3-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP22]] to i8*
// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP21]], align 8
// CHECK3-NEXT: [[TMP24:%.*]] = bitcast [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*
// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK3-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i64 24, i8* [[TMP24]], void (i8*, i8*)* @.omp.copyprivate.copy_func.9, i32 [[TMP25]])
// CHECK3-NEXT: ret void
// CHECK3: terminate.handler:
// CHECK3-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
// CHECK3-NEXT: call void @__clang_call_terminate(i8* [[EXN]]) #[[ATTR13]]
// CHECK3-NEXT: unreachable
//
//
// CHECK3-LABEL: define {{[^@]+}}@_ZZN2SSC1ERiENKUlvE_clEv
// CHECK3-SAME: (%class.anon.1* nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR10]] align 2 {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon.1*, align 8
// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
// CHECK3-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8
// CHECK3-NEXT: store %class.anon.1* [[THIS]], %class.anon.1** [[THIS_ADDR]], align 8
// CHECK3-NEXT: [[THIS1:%.*]] = load %class.anon.1*, %class.anon.1** [[THIS_ADDR]], align 8
// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1:%.*]], %class.anon.1* [[THIS1]], i32 0, i32 0
// CHECK3-NEXT: [[TMP1:%.*]] = load %struct.SS*, %struct.SS** [[TMP0]], align 8
// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 1
// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK3-NEXT: store i32 [[INC]], i32* [[TMP3]], align 4
// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 2
// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8
// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4
// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP7]], -1
// CHECK3-NEXT: store i32 [[DEC]], i32* [[TMP6]], align 4
// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 3
// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[TMP8]], align 8
// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4
// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1
// CHECK3-NEXT: store i32 [[DIV]], i32* [[TMP9]], align 4
// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 1
// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 8
// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
// CHECK3-NEXT: store i32 [[TMP13]], i32* [[CONV]], align 4
// CHECK3-NEXT: [[TMP14:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 2
// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[TMP15]], align 8
// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4
// CHECK3-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32*
// CHECK3-NEXT: store i32 [[TMP17]], i32* [[CONV2]], align 4
// CHECK3-NEXT: [[TMP18:%.*]] = load i64, i64* [[B_CASTED]], align 8
// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 3
// CHECK3-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP19]], align 8
// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
// CHECK3-NEXT: [[CONV3:%.*]] = bitcast i64* [[C_CASTED]] to i32*
// CHECK3-NEXT: store i32 [[TMP21]], i32* [[CONV3]], align 4
// CHECK3-NEXT: [[TMP22:%.*]] = load i64, i64* [[C_CASTED]], align 8
// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i64, i64, i64)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.SS* [[TMP1]], i64 [[TMP14]], i64 [[TMP18]], i64 [[TMP22]])
// CHECK3-NEXT: ret void
//
//
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK3-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.9
// CHECK3-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR9]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK3-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK3-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK3-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [3 x i8*]*
// CHECK3-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [3 x i8*]*
// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 0
// CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 0
// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
// CHECK3-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
// CHECK3-NEXT: store i32 [[TMP12]], i32* [[TMP8]], align 4
// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 1
// CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 8
// CHECK3-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i32*
// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 1
// CHECK3-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8
// CHECK3-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i32*
// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4
// CHECK3-NEXT: store i32 [[TMP19]], i32* [[TMP15]], align 4
// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 2
// CHECK3-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8
// CHECK3-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP21]] to i32*
// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 2
// CHECK3-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP23]], align 8
// CHECK3-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to i32*
// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
// CHECK3-NEXT: store i32 [[TMP26]], i32* [[TMP22]], align 4
// CHECK3-NEXT: ret void
//
//
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10
// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SS* [[THIS:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]]) #[[ATTR12]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8
// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8
// CHECK3-NEXT: [[TMP:%.*]] = alloca i32*, align 8
// CHECK3-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8
// CHECK3-NEXT: [[_TMP4:%.*]] = alloca i32*, align 8
// CHECK3-NEXT: [[_TMP5:%.*]] = alloca i32*, align 8
// CHECK3-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x i8*], align 8
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8
// CHECK3-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK3-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK3-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8
// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8
// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32*
// CHECK3-NEXT: [[CONV2:%.*]] = bitcast i64* [[C_ADDR]] to i32*
// CHECK3-NEXT: store i32* [[CONV]], i32** [[TMP]], align 8
// CHECK3-NEXT: store i32* [[CONV2]], i32** [[_TMP3]], align 8
// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP]], align 8
// CHECK3-NEXT: store i32* [[TMP1]], i32** [[_TMP4]], align 8
// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[_TMP3]], align 8
// CHECK3-NEXT: store i32* [[TMP2]], i32** [[_TMP5]], align 8
// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]])
// CHECK3-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
// CHECK3-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK3: omp_if.then:
// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[_TMP4]], align 8
// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1
// CHECK3-NEXT: store i32 [[INC]], i32* [[TMP7]], align 4
// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 8
// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP9]], -1
// CHECK3-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 8
// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP5]], align 8
// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4
// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 1
// CHECK3-NEXT: store i32 [[DIV]], i32* [[TMP10]], align 4
// CHECK3-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]])
// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK3-NEXT: br label [[OMP_IF_END]]
// CHECK3: omp_if.end:
// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0
// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP4]], align 8
// CHECK3-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8*
// CHECK3-NEXT: store i8* [[TMP14]], i8** [[TMP12]], align 8
// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1
// CHECK3-NEXT: [[TMP16:%.*]] = bitcast i32* [[CONV1]] to i8*
// CHECK3-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8
// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2
// CHECK3-NEXT: [[TMP18:%.*]] = load i32*, i32** [[_TMP5]], align 8
// CHECK3-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to i8*
// CHECK3-NEXT: store i8* [[TMP19]], i8** [[TMP17]], align 8
// CHECK3-NEXT: [[TMP20:%.*]] = bitcast [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*
// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK3-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i64 24, i8* [[TMP20]], void (i8*, i8*)* @.omp.copyprivate.copy_func.11, i32 [[TMP21]])
// CHECK3-NEXT: ret void
//
//
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK3-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.11
// CHECK3-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR9]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK3-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK3-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK3-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [3 x i8*]*
// CHECK3-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [3 x i8*]*
// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 0
// CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 0
// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
// CHECK3-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
// CHECK3-NEXT: store i32 [[TMP12]], i32* [[TMP8]], align 4
// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 1
// CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 8
// CHECK3-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i32*
// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 1
// CHECK3-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8
// CHECK3-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i32*
// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4
// CHECK3-NEXT: store i32 [[TMP19]], i32* [[TMP15]], align 4
// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 2
// CHECK3-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8
// CHECK3-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP21]] to i32*
// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 2
// CHECK3-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP23]], align 8
// CHECK3-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to i32*
// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
// CHECK3-NEXT: store i32 [[TMP26]], i32* [[TMP22]], align 4
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@_Z15parallel_singlev
// CHECK3-SAME: () #[[ATTR10]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..12 to void (i32*, i32*, ...)*))
// CHECK3-NEXT: ret void
//
//
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..12
// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR12]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK3-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8
// CHECK3-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
// CHECK3-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK3-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK3: omp_if.then:
// CHECK3-NEXT: invoke void @_Z3foov()
// CHECK3-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
// CHECK3: invoke.cont:
// CHECK3-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
// CHECK3-NEXT: br label [[OMP_IF_END]]
// CHECK3: lpad:
// CHECK3-NEXT: [[TMP4:%.*]] = landingpad { i8*, i32 }
// CHECK3-NEXT: catch i8* null
// CHECK3-NEXT: [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0
// CHECK3-NEXT: store i8* [[TMP5]], i8** [[EXN_SLOT]], align 8
// CHECK3-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 1
// CHECK3-NEXT: store i32 [[TMP6]], i32* [[EHSELECTOR_SLOT]], align 4
// CHECK3-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
// CHECK3-NEXT: br label [[TERMINATE_HANDLER:%.*]]
// CHECK3: omp_if.end:
// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]])
// CHECK3-NEXT: ret void
// CHECK3: terminate.handler:
// CHECK3-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
// CHECK3-NEXT: call void @__clang_call_terminate(i8* [[EXN]]) #[[ATTR13]]
// CHECK3-NEXT: unreachable
//
//
// CHECK3-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_single_codegen.cpp
// CHECK3-SAME: () #[[ATTR0]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: call void @__cxx_global_var_init()
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK3-NEXT: call void @__cxx_global_var_init.4()
// CHECK3-NEXT: call void @.__omp_threadprivate_init_.()
// CHECK3-NEXT: call void @.__omp_threadprivate_init_..3()
// CHECK3-NEXT: ret void
//
//
// CHECK4-LABEL: define {{[^@]+}}@__cxx_global_var_init
// CHECK4-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK4-NEXT: call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast (%class.TestClass* @tc to i8*), i8* (i8*)* @.__kmpc_global_ctor_., i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_.)
// CHECK4-NEXT: call void @_ZN9TestClassC1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) @tc)
// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%class.TestClass*)* @_ZN9TestClassD1Ev to void (i8*)*), i8* bitcast (%class.TestClass* @tc to i8*), i8* @__dso_handle) #[[ATTR3:[0-9]+]]
// CHECK4-NEXT: ret void
//
//
// CHECK4-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_.
// CHECK4-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK4-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK4-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*
// CHECK4-NEXT: call void @_ZN9TestClassC1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[TMP2]])
// CHECK4-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK4-NEXT: ret i8* [[TMP3]]
//
//
// CHECK4-LABEL: define {{[^@]+}}@_ZN9TestClassC1Ev
// CHECK4-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %class.TestClass*, align 8
// CHECK4-NEXT: store %class.TestClass* [[THIS]], %class.TestClass** [[THIS_ADDR]], align 8
// CHECK4-NEXT: [[THIS1:%.*]] = load %class.TestClass*, %class.TestClass** [[THIS_ADDR]], align 8
// CHECK4-NEXT: call void @_ZN9TestClassC2Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS1]])
// CHECK4-NEXT: ret void
//
//
// CHECK4-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_.
// CHECK4-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK4-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK4-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*
// CHECK4-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[TMP2]]) #[[ATTR3]]
// CHECK4-NEXT: ret void
//
//
// CHECK4-LABEL: define {{[^@]+}}@_ZN9TestClassD1Ev
// CHECK4-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] comdat align 2 {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %class.TestClass*, align 8
// CHECK4-NEXT: store %class.TestClass* [[THIS]], %class.TestClass** [[THIS_ADDR]], align 8
// CHECK4-NEXT: [[THIS1:%.*]] = load %class.TestClass*, %class.TestClass** [[THIS_ADDR]], align 8
// CHECK4-NEXT: call void @_ZN9TestClassD2Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR3]]
// CHECK4-NEXT: ret void
//
//
// CHECK4-LABEL: define {{[^@]+}}@__cxx_global_var_init.1
// CHECK4-SAME: () #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8
// CHECK4-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT: call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast ([2 x %class.TestClass]* @tc2 to i8*), i8* (i8*)* @.__kmpc_global_ctor_..2, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..3)
// CHECK4-NEXT: br label [[ARRAYCTOR_LOOP:%.*]]
// CHECK4: arrayctor.loop:
// CHECK4-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %class.TestClass* [ getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[INVOKE_CONT:%.*]] ]
// CHECK4-NEXT: invoke void @_ZN9TestClassC1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]])
// CHECK4-NEXT: to label [[INVOKE_CONT]] unwind label [[LPAD:%.*]]
// CHECK4: invoke.cont:
// CHECK4-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAYCTOR_CUR]], i64 1
// CHECK4-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYCTOR_NEXT]], getelementptr inbounds ([[CLASS_TESTCLASS]], %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), i64 2)
// CHECK4-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]]
// CHECK4: arrayctor.cont:
// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__cxa_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR3]]
// CHECK4-NEXT: ret void
// CHECK4: lpad:
// CHECK4-NEXT: [[TMP2:%.*]] = landingpad { i8*, i32 }
// CHECK4-NEXT: cleanup
// CHECK4-NEXT: [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 0
// CHECK4-NEXT: store i8* [[TMP3]], i8** [[EXN_SLOT]], align 8
// CHECK4-NEXT: [[TMP4:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 1
// CHECK4-NEXT: store i32 [[TMP4]], i32* [[EHSELECTOR_SLOT]], align 4
// CHECK4-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), [[ARRAYCTOR_CUR]]
// CHECK4-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY:%.*]]
// CHECK4: arraydestroy.body:
// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[ARRAYCTOR_CUR]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
// CHECK4-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
// CHECK4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0)
// CHECK4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1]], label [[ARRAYDESTROY_BODY]]
// CHECK4: arraydestroy.done1:
// CHECK4-NEXT: br label [[EH_RESUME:%.*]]
// CHECK4: eh.resume:
// CHECK4-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
// CHECK4-NEXT: [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4
// CHECK4-NEXT: [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0
// CHECK4-NEXT: [[LPAD_VAL2:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1
// CHECK4-NEXT: resume { i8*, i32 } [[LPAD_VAL2]]
//
//
// CHECK4-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_..2
// CHECK4-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK4-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8
// CHECK4-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
// CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK4-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK4-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [2 x %class.TestClass]*
// CHECK4-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %class.TestClass], [2 x %class.TestClass]* [[TMP2]], i32 0, i32 0
// CHECK4-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAY_BEGIN]], i64 2
// CHECK4-NEXT: br label [[ARRAYCTOR_LOOP:%.*]]
// CHECK4: arrayctor.loop:
// CHECK4-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %class.TestClass* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[INVOKE_CONT:%.*]] ]
// CHECK4-NEXT: invoke void @_ZN9TestClassC1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]])
// CHECK4-NEXT: to label [[INVOKE_CONT]] unwind label [[LPAD:%.*]]
// CHECK4: invoke.cont:
// CHECK4-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYCTOR_CUR]], i64 1
// CHECK4-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]]
// CHECK4-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]]
// CHECK4: arrayctor.cont:
// CHECK4-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK4-NEXT: ret i8* [[TMP3]]
// CHECK4: lpad:
// CHECK4-NEXT: [[TMP4:%.*]] = landingpad { i8*, i32 }
// CHECK4-NEXT: cleanup
// CHECK4-NEXT: [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0
// CHECK4-NEXT: store i8* [[TMP5]], i8** [[EXN_SLOT]], align 8
// CHECK4-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 1
// CHECK4-NEXT: store i32 [[TMP6]], i32* [[EHSELECTOR_SLOT]], align 4
// CHECK4-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %class.TestClass* [[ARRAY_BEGIN]], [[ARRAYCTOR_CUR]]
// CHECK4-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY:%.*]]
// CHECK4: arraydestroy.body:
// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[ARRAYCTOR_CUR]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
// CHECK4-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
// CHECK4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
// CHECK4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1]], label [[ARRAYDESTROY_BODY]]
// CHECK4: arraydestroy.done1:
// CHECK4-NEXT: br label [[EH_RESUME:%.*]]
// CHECK4: eh.resume:
// CHECK4-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
// CHECK4-NEXT: [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4
// CHECK4-NEXT: [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0
// CHECK4-NEXT: [[LPAD_VAL2:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1
// CHECK4-NEXT: resume { i8*, i32 } [[LPAD_VAL2]]
//
//
// CHECK4-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_..3
// CHECK4-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK4-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK4-NEXT: [[ARRAY_BEGIN:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*
// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAY_BEGIN]], i64 2
// CHECK4-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
// CHECK4: arraydestroy.body:
// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
// CHECK4-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
// CHECK4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
// CHECK4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]]
// CHECK4: arraydestroy.done1:
// CHECK4-NEXT: ret void
//
//
// CHECK4-LABEL: define {{[^@]+}}@__cxx_global_array_dtor
// CHECK4-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK4-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
// CHECK4: arraydestroy.body:
// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ getelementptr inbounds ([[CLASS_TESTCLASS:%.*]], %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
// CHECK4-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
// CHECK4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0)
// CHECK4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]]
// CHECK4: arraydestroy.done1:
// CHECK4-NEXT: ret void
//
//
// CHECK4-LABEL: define {{[^@]+}}@_Z3foov
// CHECK4-SAME: () #[[ATTR4:[0-9]+]] {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: call void @_Z8mayThrowv()
// CHECK4-NEXT: ret void
//
//
// CHECK4-LABEL: define {{[^@]+}}@main
// CHECK4-SAME: () #[[ATTR6:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[A:%.*]] = alloca i8, align 1
// CHECK4-NEXT: [[A2:%.*]] = alloca [2 x i8], align 1
// CHECK4-NEXT: [[C:%.*]] = alloca %class.TestClass*, align 8
// CHECK4-NEXT: [[SST:%.*]] = alloca [[STRUCT_SST:%.*]], align 8
// CHECK4-NEXT: [[SS:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
// CHECK4-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8
// CHECK4-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [5 x i8*], align 8
// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT: store i32 0, i32* [[RETVAL]], align 4
// CHECK4-NEXT: store %class.TestClass* @tc, %class.TestClass** [[C]], align 8
// CHECK4-NEXT: call void @_ZN3SSTIdEC1Ev(%struct.SST* nonnull align 8 dereferenceable(8) [[SST]])
// CHECK4-NEXT: call void @_ZN2SSC1ERi(%struct.SS* nonnull align 8 dereferenceable(16) [[SS]], i32* nonnull align 4 dereferenceable(4) getelementptr inbounds ([[CLASS_TESTCLASS:%.*]], %class.TestClass* @tc, i32 0, i32 0))
// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK4-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK4: omp_if.then:
// CHECK4-NEXT: store i8 2, i8* [[A]], align 1
// CHECK4-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: br label [[OMP_IF_END]]
// CHECK4: omp_if.end:
// CHECK4-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
// CHECK4-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN1:%.*]], label [[OMP_IF_END2:%.*]]
// CHECK4: omp_if.then1:
// CHECK4-NEXT: store i8 2, i8* [[A]], align 1
// CHECK4-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: br label [[OMP_IF_END2]]
// CHECK4: omp_if.end2:
// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
// CHECK4-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN3:%.*]], label [[OMP_IF_END4:%.*]]
// CHECK4: omp_if.then3:
// CHECK4-NEXT: invoke void @_Z3foov()
// CHECK4-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
// CHECK4: invoke.cont:
// CHECK4-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK4-NEXT: br label [[OMP_IF_END4]]
// CHECK4: lpad:
// CHECK4-NEXT: [[TMP7:%.*]] = landingpad { i8*, i32 }
// CHECK4-NEXT: catch i8* null
// CHECK4-NEXT: [[TMP8:%.*]] = extractvalue { i8*, i32 } [[TMP7]], 0
// CHECK4-NEXT: store i8* [[TMP8]], i8** [[EXN_SLOT]], align 8
// CHECK4-NEXT: [[TMP9:%.*]] = extractvalue { i8*, i32 } [[TMP7]], 1
// CHECK4-NEXT: store i32 [[TMP9]], i32* [[EHSELECTOR_SLOT]], align 4
// CHECK4-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: br label [[TERMINATE_HANDLER:%.*]]
// CHECK4: omp_if.end4:
// CHECK4-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0
// CHECK4-NEXT: store i8* [[A]], i8** [[TMP10]], align 8
// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1
// CHECK4-NEXT: store i8* bitcast (%class.TestClass* @tc to i8*), i8** [[TMP11]], align 8
// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2
// CHECK4-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (%class.TestClass* @tc to i8*), i64 4, i8*** @tc.cache.)
// CHECK4-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to %class.TestClass*
// CHECK4-NEXT: [[TMP15:%.*]] = bitcast %class.TestClass* [[TMP14]] to i8*
// CHECK4-NEXT: store i8* [[TMP15]], i8** [[TMP12]], align 8
// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 3
// CHECK4-NEXT: [[TMP17:%.*]] = bitcast [2 x i8]* [[A2]] to i8*
// CHECK4-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8
// CHECK4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 4
// CHECK4-NEXT: [[TMP19:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast ([2 x %class.TestClass]* @tc2 to i8*), i64 8, i8*** @tc2.cache.)
// CHECK4-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to [2 x %class.TestClass]*
// CHECK4-NEXT: [[TMP21:%.*]] = bitcast [2 x %class.TestClass]* [[TMP20]] to i8*
// CHECK4-NEXT: store i8* [[TMP21]], i8** [[TMP18]], align 8
// CHECK4-NEXT: [[TMP22:%.*]] = bitcast [5 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*
// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK4-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i64 40, i8* [[TMP22]], void (i8*, i8*)* @.omp.copyprivate.copy_func, i32 [[TMP23]])
// CHECK4-NEXT: [[TMP24:%.*]] = load i8, i8* [[A]], align 1
// CHECK4-NEXT: [[CONV:%.*]] = sext i8 [[TMP24]] to i32
// CHECK4-NEXT: ret i32 [[CONV]]
// CHECK4: terminate.handler:
// CHECK4-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
// CHECK4-NEXT: call void @__clang_call_terminate(i8* [[EXN]]) #[[ATTR13:[0-9]+]]
// CHECK4-NEXT: unreachable
//
//
// CHECK4-LABEL: define {{[^@]+}}@_ZN3SSTIdEC1Ev
// CHECK4-SAME: (%struct.SST* nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8
// CHECK4-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8
// CHECK4-NEXT: [[THIS1:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8
// CHECK4-NEXT: call void @_ZN3SSTIdEC2Ev(%struct.SST* nonnull align 8 dereferenceable(8) [[THIS1]])
// CHECK4-NEXT: ret void
//
//
// CHECK4-LABEL: define {{[^@]+}}@_ZN2SSC1ERi
// CHECK4-SAME: (%struct.SS* nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32* nonnull align 4 dereferenceable(4) [[D:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8
// CHECK4-NEXT: [[D_ADDR:%.*]] = alloca i32*, align 8
// CHECK4-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8
// CHECK4-NEXT: store i32* [[D]], i32** [[D_ADDR]], align 8
// CHECK4-NEXT: [[THIS1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8
// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[D_ADDR]], align 8
// CHECK4-NEXT: call void @_ZN2SSC2ERi(%struct.SS* nonnull align 8 dereferenceable(16) [[THIS1]], i32* nonnull align 4 dereferenceable(4) [[TMP0]])
// CHECK4-NEXT: ret void
//
//
// CHECK4-LABEL: define {{[^@]+}}@__clang_call_terminate
// CHECK4-SAME: (i8* [[TMP0:%.*]]) #[[ATTR8:[0-9]+]] comdat {
// CHECK4-NEXT: [[TMP2:%.*]] = call i8* @__cxa_begin_catch(i8* [[TMP0]]) #[[ATTR3]]
// CHECK4-NEXT: call void @_ZSt9terminatev() #[[ATTR13]]
// CHECK4-NEXT: unreachable
//
//
// CHECK4-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func
// CHECK4-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR9:[0-9]+]] {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK4-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK4-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK4-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK4-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [5 x i8*]*
// CHECK4-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
// CHECK4-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [5 x i8*]*
// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP3]], i64 0, i64 0
// CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP5]], i64 0, i64 0
// CHECK4-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8
// CHECK4-NEXT: [[TMP10:%.*]] = load i8, i8* [[TMP9]], align 1
// CHECK4-NEXT: store i8 [[TMP10]], i8* [[TMP7]], align 1
// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP3]], i64 0, i64 1
// CHECK4-NEXT: [[TMP12:%.*]] = load i8*, i8** [[TMP11]], align 8
// CHECK4-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to %class.TestClass*
// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP5]], i64 0, i64 1
// CHECK4-NEXT: [[TMP15:%.*]] = load i8*, i8** [[TMP14]], align 8
// CHECK4-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to %class.TestClass*
// CHECK4-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(4) %class.TestClass* @_ZN9TestClassaSERKS_(%class.TestClass* nonnull align 4 dereferenceable(4) [[TMP13]], %class.TestClass* nonnull align 4 dereferenceable(4) [[TMP16]])
// CHECK4-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP3]], i64 0, i64 2
// CHECK4-NEXT: [[TMP18:%.*]] = load i8*, i8** [[TMP17]], align 8
// CHECK4-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP18]] to %class.TestClass*
// CHECK4-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP5]], i64 0, i64 2
// CHECK4-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8
// CHECK4-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP21]] to %class.TestClass*
// CHECK4-NEXT: [[CALL2:%.*]] = call nonnull align 4 dereferenceable(4) %class.TestClass* @_ZN9TestClassaSERKS_(%class.TestClass* nonnull align 4 dereferenceable(4) [[TMP19]], %class.TestClass* nonnull align 4 dereferenceable(4) [[TMP22]])
// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP3]], i64 0, i64 3
// CHECK4-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP23]], align 8
// CHECK4-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP5]], i64 0, i64 3
// CHECK4-NEXT: [[TMP26:%.*]] = load i8*, i8** [[TMP25]], align 8
// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[TMP24]], i8* align 1 [[TMP26]], i64 2, i1 false)
// CHECK4-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP3]], i64 0, i64 4
// CHECK4-NEXT: [[TMP28:%.*]] = load i8*, i8** [[TMP27]], align 8
// CHECK4-NEXT: [[TMP29:%.*]] = bitcast i8* [[TMP28]] to %class.TestClass*
// CHECK4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP5]], i64 0, i64 4
// CHECK4-NEXT: [[TMP31:%.*]] = load i8*, i8** [[TMP30]], align 8
// CHECK4-NEXT: [[TMP32:%.*]] = bitcast i8* [[TMP31]] to %class.TestClass*
// CHECK4-NEXT: [[TMP33:%.*]] = getelementptr [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[TMP29]], i64 2
// CHECK4-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %class.TestClass* [[TMP29]], [[TMP33]]
// CHECK4-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK4: omp.arraycpy.body:
// CHECK4-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %class.TestClass* [ [[TMP32]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK4-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %class.TestClass* [ [[TMP29]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK4-NEXT: [[CALL3:%.*]] = call nonnull align 4 dereferenceable(4) %class.TestClass* @_ZN9TestClassaSERKS_(%class.TestClass* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %class.TestClass* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]])
// CHECK4-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[CLASS_TESTCLASS]], %class.TestClass* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK4-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[CLASS_TESTCLASS]], %class.TestClass* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
// CHECK4-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %class.TestClass* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP33]]
// CHECK4-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]]
// CHECK4: omp.arraycpy.done4:
// CHECK4-NEXT: ret void
//
//
// CHECK4-LABEL: define {{[^@]+}}@_ZN9TestClassaSERKS_
// CHECK4-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]], %class.TestClass* nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR10:[0-9]+]] comdat align 2 {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %class.TestClass*, align 8
// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca %class.TestClass*, align 8
// CHECK4-NEXT: store %class.TestClass* [[THIS]], %class.TestClass** [[THIS_ADDR]], align 8
// CHECK4-NEXT: store %class.TestClass* [[TMP0]], %class.TestClass** [[DOTADDR]], align 8
// CHECK4-NEXT: [[THIS1:%.*]] = load %class.TestClass*, %class.TestClass** [[THIS_ADDR]], align 8
// CHECK4-NEXT: ret %class.TestClass* [[THIS1]]
//
//
// CHECK4-LABEL: define {{[^@]+}}@_Z15parallel_singlev
// CHECK4-SAME: () #[[ATTR10]] {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
// CHECK4-NEXT: ret void
//
//
// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined.
// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR12:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK4-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8
// CHECK4-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
// CHECK4-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK4-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK4: omp_if.then:
// CHECK4-NEXT: invoke void @_Z3foov()
// CHECK4-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
// CHECK4: invoke.cont:
// CHECK4-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
// CHECK4-NEXT: br label [[OMP_IF_END]]
// CHECK4: lpad:
// CHECK4-NEXT: [[TMP4:%.*]] = landingpad { i8*, i32 }
// CHECK4-NEXT: catch i8* null
// CHECK4-NEXT: [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0
// CHECK4-NEXT: store i8* [[TMP5]], i8** [[EXN_SLOT]], align 8
// CHECK4-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 1
// CHECK4-NEXT: store i32 [[TMP6]], i32* [[EHSELECTOR_SLOT]], align 4
// CHECK4-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
// CHECK4-NEXT: br label [[TERMINATE_HANDLER:%.*]]
// CHECK4: omp_if.end:
// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]])
// CHECK4-NEXT: ret void
// CHECK4: terminate.handler:
// CHECK4-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
// CHECK4-NEXT: call void @__clang_call_terminate(i8* [[EXN]]) #[[ATTR13]]
// CHECK4-NEXT: unreachable
//
//
// CHECK4-LABEL: define {{[^@]+}}@_ZN9TestClassC2Ev
// CHECK4-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %class.TestClass*, align 8
// CHECK4-NEXT: store %class.TestClass* [[THIS]], %class.TestClass** [[THIS_ADDR]], align 8
// CHECK4-NEXT: [[THIS1:%.*]] = load %class.TestClass*, %class.TestClass** [[THIS_ADDR]], align 8
// CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[THIS1]], i32 0, i32 0
// CHECK4-NEXT: store i32 0, i32* [[A]], align 4
// CHECK4-NEXT: ret void
//
//
// CHECK4-LABEL: define {{[^@]+}}@_ZN9TestClassD2Ev
// CHECK4-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %class.TestClass*, align 8
// CHECK4-NEXT: store %class.TestClass* [[THIS]], %class.TestClass** [[THIS_ADDR]], align 8
// CHECK4-NEXT: [[THIS1:%.*]] = load %class.TestClass*, %class.TestClass** [[THIS_ADDR]], align 8
// CHECK4-NEXT: ret void
//
//
// CHECK4-LABEL: define {{[^@]+}}@_ZN2SSC2ERi
// CHECK4-SAME: (%struct.SS* nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32* nonnull align 4 dereferenceable(4) [[D:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8
// CHECK4-NEXT: [[D_ADDR:%.*]] = alloca i32*, align 8
// CHECK4-NEXT: [[A2:%.*]] = alloca i32*, align 8
// CHECK4-NEXT: [[B4:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[C7:%.*]] = alloca i32*, align 8
// CHECK4-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK4-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
// CHECK4-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8
// CHECK4-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8
// CHECK4-NEXT: store i32* [[D]], i32** [[D_ADDR]], align 8
// CHECK4-NEXT: [[THIS1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8
// CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[THIS1]], i32 0, i32 0
// CHECK4-NEXT: store i32 0, i32* [[A]], align 8
// CHECK4-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 1
// CHECK4-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[B]], align 4
// CHECK4-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -16
// CHECK4-NEXT: store i8 [[BF_CLEAR]], i8* [[B]], align 4
// CHECK4-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 2
// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[D_ADDR]], align 8
// CHECK4-NEXT: store i32* [[TMP0]], i32** [[C]], align 8
// CHECK4-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 0
// CHECK4-NEXT: store i32* [[A3]], i32** [[A2]], align 8
// CHECK4-NEXT: [[B5:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 1
// CHECK4-NEXT: [[BF_LOAD6:%.*]] = load i8, i8* [[B5]], align 4
// CHECK4-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD6]], 4
// CHECK4-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 4
// CHECK4-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32
// CHECK4-NEXT: store i32 [[BF_CAST]], i32* [[B4]], align 4
// CHECK4-NEXT: [[C8:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 2
// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[C8]], align 8
// CHECK4-NEXT: store i32* [[TMP1]], i32** [[C7]], align 8
// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A2]], align 8
// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
// CHECK4-NEXT: store i32 [[TMP3]], i32* [[CONV]], align 4
// CHECK4-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[B4]], align 4
// CHECK4-NEXT: [[CONV9:%.*]] = bitcast i64* [[B_CASTED]] to i32*
// CHECK4-NEXT: store i32 [[TMP5]], i32* [[CONV9]], align 4
// CHECK4-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8
// CHECK4-NEXT: [[TMP7:%.*]] = load i32*, i32** [[C7]], align 8
// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
// CHECK4-NEXT: [[CONV10:%.*]] = bitcast i64* [[C_CASTED]] to i32*
// CHECK4-NEXT: store i32 [[TMP8]], i32* [[CONV10]], align 4
// CHECK4-NEXT: [[TMP9:%.*]] = load i64, i64* [[C_CASTED]], align 8
// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i64, i64, i64)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.SS* [[THIS1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP9]])
// CHECK4-NEXT: ret void
//
//
// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..4
// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SS* [[THIS:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]]) #[[ATTR12]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8
// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8
// CHECK4-NEXT: [[TMP:%.*]] = alloca i32*, align 8
// CHECK4-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8
// CHECK4-NEXT: [[_TMP4:%.*]] = alloca i32*, align 8
// CHECK4-NEXT: [[_TMP5:%.*]] = alloca i32*, align 8
// CHECK4-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 8
// CHECK4-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8
// CHECK4-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x i8*], align 8
// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK4-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8
// CHECK4-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK4-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK4-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8
// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8
// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK4-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32*
// CHECK4-NEXT: [[CONV2:%.*]] = bitcast i64* [[C_ADDR]] to i32*
// CHECK4-NEXT: store i32* [[CONV]], i32** [[TMP]], align 8
// CHECK4-NEXT: store i32* [[CONV2]], i32** [[_TMP3]], align 8
// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP]], align 8
// CHECK4-NEXT: store i32* [[TMP1]], i32** [[_TMP4]], align 8
// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[_TMP3]], align 8
// CHECK4-NEXT: store i32* [[TMP2]], i32** [[_TMP5]], align 8
// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK4-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
// CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]])
// CHECK4-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
// CHECK4-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK4: omp_if.then:
// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 0
// CHECK4-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP7]], align 8
// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 1
// CHECK4-NEXT: [[TMP9:%.*]] = load i32*, i32** [[_TMP4]], align 8
// CHECK4-NEXT: store i32* [[TMP9]], i32** [[TMP8]], align 8
// CHECK4-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 2
// CHECK4-NEXT: store i32* [[CONV1]], i32** [[TMP10]], align 8
// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 3
// CHECK4-NEXT: [[TMP12:%.*]] = load i32*, i32** [[_TMP5]], align 8
// CHECK4-NEXT: store i32* [[TMP12]], i32** [[TMP11]], align 8
// CHECK4-NEXT: invoke void @_ZZN2SSC1ERiENKUlvE_clEv(%class.anon* nonnull align 8 dereferenceable(32) [[REF_TMP]])
// CHECK4-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
// CHECK4: invoke.cont:
// CHECK4-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]])
// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK4-NEXT: br label [[OMP_IF_END]]
// CHECK4: lpad:
// CHECK4-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 }
// CHECK4-NEXT: catch i8* null
// CHECK4-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0
// CHECK4-NEXT: store i8* [[TMP14]], i8** [[EXN_SLOT]], align 8
// CHECK4-NEXT: [[TMP15:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 1
// CHECK4-NEXT: store i32 [[TMP15]], i32* [[EHSELECTOR_SLOT]], align 4
// CHECK4-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]])
// CHECK4-NEXT: br label [[TERMINATE_HANDLER:%.*]]
// CHECK4: omp_if.end:
// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0
// CHECK4-NEXT: [[TMP17:%.*]] = load i32*, i32** [[_TMP4]], align 8
// CHECK4-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP17]] to i8*
// CHECK4-NEXT: store i8* [[TMP18]], i8** [[TMP16]], align 8
// CHECK4-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1
// CHECK4-NEXT: [[TMP20:%.*]] = bitcast i32* [[CONV1]] to i8*
// CHECK4-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8
// CHECK4-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2
// CHECK4-NEXT: [[TMP22:%.*]] = load i32*, i32** [[_TMP5]], align 8
// CHECK4-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP22]] to i8*
// CHECK4-NEXT: store i8* [[TMP23]], i8** [[TMP21]], align 8
// CHECK4-NEXT: [[TMP24:%.*]] = bitcast [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*
// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK4-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i64 24, i8* [[TMP24]], void (i8*, i8*)* @.omp.copyprivate.copy_func.5, i32 [[TMP25]])
// CHECK4-NEXT: ret void
// CHECK4: terminate.handler:
// CHECK4-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
// CHECK4-NEXT: call void @__clang_call_terminate(i8* [[EXN]]) #[[ATTR13]]
// CHECK4-NEXT: unreachable
//
//
// CHECK4-LABEL: define {{[^@]+}}@_ZZN2SSC1ERiENKUlvE_clEv
// CHECK4-SAME: (%class.anon* nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR10]] align 2 {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon*, align 8
// CHECK4-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK4-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
// CHECK4-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8
// CHECK4-NEXT: store %class.anon* [[THIS]], %class.anon** [[THIS_ADDR]], align 8
// CHECK4-NEXT: [[THIS1:%.*]] = load %class.anon*, %class.anon** [[THIS_ADDR]], align 8
// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON:%.*]], %class.anon* [[THIS1]], i32 0, i32 0
// CHECK4-NEXT: [[TMP1:%.*]] = load %struct.SS*, %struct.SS** [[TMP0]], align 8
// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[THIS1]], i32 0, i32 1
// CHECK4-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK4-NEXT: store i32 [[INC]], i32* [[TMP3]], align 4
// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[THIS1]], i32 0, i32 2
// CHECK4-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8
// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4
// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP7]], -1
// CHECK4-NEXT: store i32 [[DEC]], i32* [[TMP6]], align 4
// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[THIS1]], i32 0, i32 3
// CHECK4-NEXT: [[TMP9:%.*]] = load i32*, i32** [[TMP8]], align 8
// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4
// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1
// CHECK4-NEXT: store i32 [[DIV]], i32* [[TMP9]], align 4
// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[THIS1]], i32 0, i32 1
// CHECK4-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 8
// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
// CHECK4-NEXT: store i32 [[TMP13]], i32* [[CONV]], align 4
// CHECK4-NEXT: [[TMP14:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[THIS1]], i32 0, i32 2
// CHECK4-NEXT: [[TMP16:%.*]] = load i32*, i32** [[TMP15]], align 8
// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4
// CHECK4-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32*
// CHECK4-NEXT: store i32 [[TMP17]], i32* [[CONV2]], align 4
// CHECK4-NEXT: [[TMP18:%.*]] = load i64, i64* [[B_CASTED]], align 8
// CHECK4-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[THIS1]], i32 0, i32 3
// CHECK4-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP19]], align 8
// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
// CHECK4-NEXT: [[CONV3:%.*]] = bitcast i64* [[C_CASTED]] to i32*
// CHECK4-NEXT: store i32 [[TMP21]], i32* [[CONV3]], align 4
// CHECK4-NEXT: [[TMP22:%.*]] = load i64, i64* [[C_CASTED]], align 8
// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i64, i64, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SS* [[TMP1]], i64 [[TMP14]], i64 [[TMP18]], i64 [[TMP22]])
// CHECK4-NEXT: ret void
//
//
// CHECK4-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.5
// CHECK4-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR9]] {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK4-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK4-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK4-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK4-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [3 x i8*]*
// CHECK4-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
// CHECK4-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [3 x i8*]*
// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 0
// CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK4-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 0
// CHECK4-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
// CHECK4-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
// CHECK4-NEXT: store i32 [[TMP12]], i32* [[TMP8]], align 4
// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 1
// CHECK4-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 8
// CHECK4-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i32*
// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 1
// CHECK4-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8
// CHECK4-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i32*
// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4
// CHECK4-NEXT: store i32 [[TMP19]], i32* [[TMP15]], align 4
// CHECK4-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 2
// CHECK4-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8
// CHECK4-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP21]] to i32*
// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 2
// CHECK4-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP23]], align 8
// CHECK4-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to i32*
// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
// CHECK4-NEXT: store i32 [[TMP26]], i32* [[TMP22]], align 4
// CHECK4-NEXT: ret void
//
//
// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..6
// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SS* [[THIS:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]]) #[[ATTR12]] {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8
// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8
// CHECK4-NEXT: [[TMP:%.*]] = alloca i32*, align 8
// CHECK4-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8
// CHECK4-NEXT: [[_TMP4:%.*]] = alloca i32*, align 8
// CHECK4-NEXT: [[_TMP5:%.*]] = alloca i32*, align 8
// CHECK4-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x i8*], align 8
// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK4-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8
// CHECK4-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK4-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK4-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8
// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8
// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK4-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32*
// CHECK4-NEXT: [[CONV2:%.*]] = bitcast i64* [[C_ADDR]] to i32*
// CHECK4-NEXT: store i32* [[CONV]], i32** [[TMP]], align 8
// CHECK4-NEXT: store i32* [[CONV2]], i32** [[_TMP3]], align 8
// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP]], align 8
// CHECK4-NEXT: store i32* [[TMP1]], i32** [[_TMP4]], align 8
// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[_TMP3]], align 8
// CHECK4-NEXT: store i32* [[TMP2]], i32** [[_TMP5]], align 8
// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK4-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
// CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]])
// CHECK4-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
// CHECK4-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK4: omp_if.then:
// CHECK4-NEXT: [[TMP7:%.*]] = load i32*, i32** [[_TMP4]], align 8
// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1
// CHECK4-NEXT: store i32 [[INC]], i32* [[TMP7]], align 4
// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 8
// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP9]], -1
// CHECK4-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 8
// CHECK4-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP5]], align 8
// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4
// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 1
// CHECK4-NEXT: store i32 [[DIV]], i32* [[TMP10]], align 4
// CHECK4-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]])
// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK4-NEXT: br label [[OMP_IF_END]]
// CHECK4: omp_if.end:
// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0
// CHECK4-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP4]], align 8
// CHECK4-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8*
// CHECK4-NEXT: store i8* [[TMP14]], i8** [[TMP12]], align 8
// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1
// CHECK4-NEXT: [[TMP16:%.*]] = bitcast i32* [[CONV1]] to i8*
// CHECK4-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8
// CHECK4-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2
// CHECK4-NEXT: [[TMP18:%.*]] = load i32*, i32** [[_TMP5]], align 8
// CHECK4-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to i8*
// CHECK4-NEXT: store i8* [[TMP19]], i8** [[TMP17]], align 8
// CHECK4-NEXT: [[TMP20:%.*]] = bitcast [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*
// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK4-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i64 24, i8* [[TMP20]], void (i8*, i8*)* @.omp.copyprivate.copy_func.7, i32 [[TMP21]])
// CHECK4-NEXT: ret void
//
//
// CHECK4-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.7
// CHECK4-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR9]] {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK4-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK4-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK4-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK4-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [3 x i8*]*
// CHECK4-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
// CHECK4-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [3 x i8*]*
// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 0
// CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK4-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 0
// CHECK4-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
// CHECK4-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
// CHECK4-NEXT: store i32 [[TMP12]], i32* [[TMP8]], align 4
// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 1
// CHECK4-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 8
// CHECK4-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i32*
// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 1
// CHECK4-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8
// CHECK4-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i32*
// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4
// CHECK4-NEXT: store i32 [[TMP19]], i32* [[TMP15]], align 4
// CHECK4-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 2
// CHECK4-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8
// CHECK4-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP21]] to i32*
// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 2
// CHECK4-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP23]], align 8
// CHECK4-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to i32*
// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
// CHECK4-NEXT: store i32 [[TMP26]], i32* [[TMP22]], align 4
// CHECK4-NEXT: ret void
//
//
// CHECK4-LABEL: define {{[^@]+}}@_ZN3SSTIdEC2Ev
// CHECK4-SAME: (%struct.SST* nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8
// CHECK4-NEXT: [[A2:%.*]] = alloca double*, align 8
// CHECK4-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK4-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8
// CHECK4-NEXT: [[THIS1:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8
// CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], %struct.SST* [[THIS1]], i32 0, i32 0
// CHECK4-NEXT: store double 0.000000e+00, double* [[A]], align 8
// CHECK4-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SST]], %struct.SST* [[THIS1]], i32 0, i32 0
// CHECK4-NEXT: store double* [[A3]], double** [[A2]], align 8
// CHECK4-NEXT: [[TMP0:%.*]] = load double*, double** [[A2]], align 8
// CHECK4-NEXT: [[TMP1:%.*]] = load double, double* [[TMP0]], align 8
// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to double*
// CHECK4-NEXT: store double [[TMP1]], double* [[CONV]], align 8
// CHECK4-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SST*, i64)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.SST* [[THIS1]], i64 [[TMP2]])
// CHECK4-NEXT: ret void
//
//
// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..8
// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SST* [[THIS:%.*]], i64 [[A:%.*]]) #[[ATTR12]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8
// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK4-NEXT: [[TMP:%.*]] = alloca double*, align 8
// CHECK4-NEXT: [[_TMP1:%.*]] = alloca double*, align 8
// CHECK4-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8
// CHECK4-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8
// CHECK4-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x i8*], align 8
// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK4-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8
// CHECK4-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8
// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to double*
// CHECK4-NEXT: store double* [[CONV]], double** [[TMP]], align 8
// CHECK4-NEXT: [[TMP1:%.*]] = load double*, double** [[TMP]], align 8
// CHECK4-NEXT: store double* [[TMP1]], double** [[_TMP1]], align 8
// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]])
// CHECK4-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
// CHECK4-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK4: omp_if.then:
// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0
// CHECK4-NEXT: store %struct.SST* [[TMP0]], %struct.SST** [[TMP6]], align 8
// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1
// CHECK4-NEXT: [[TMP8:%.*]] = load double*, double** [[_TMP1]], align 8
// CHECK4-NEXT: store double* [[TMP8]], double** [[TMP7]], align 8
// CHECK4-NEXT: invoke void @_ZZN3SSTIdEC1EvENKUlvE_clEv(%class.anon.0* nonnull align 8 dereferenceable(16) [[REF_TMP]])
// CHECK4-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
// CHECK4: invoke.cont:
// CHECK4-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]])
// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK4-NEXT: br label [[OMP_IF_END]]
// CHECK4: lpad:
// CHECK4-NEXT: [[TMP9:%.*]] = landingpad { i8*, i32 }
// CHECK4-NEXT: catch i8* null
// CHECK4-NEXT: [[TMP10:%.*]] = extractvalue { i8*, i32 } [[TMP9]], 0
// CHECK4-NEXT: store i8* [[TMP10]], i8** [[EXN_SLOT]], align 8
// CHECK4-NEXT: [[TMP11:%.*]] = extractvalue { i8*, i32 } [[TMP9]], 1
// CHECK4-NEXT: store i32 [[TMP11]], i32* [[EHSELECTOR_SLOT]], align 4
// CHECK4-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]])
// CHECK4-NEXT: br label [[TERMINATE_HANDLER:%.*]]
// CHECK4: omp_if.end:
// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0
// CHECK4-NEXT: [[TMP13:%.*]] = load double*, double** [[_TMP1]], align 8
// CHECK4-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to i8*
// CHECK4-NEXT: store i8* [[TMP14]], i8** [[TMP12]], align 8
// CHECK4-NEXT: [[TMP15:%.*]] = bitcast [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*
// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK4-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i64 8, i8* [[TMP15]], void (i8*, i8*)* @.omp.copyprivate.copy_func.9, i32 [[TMP16]])
// CHECK4-NEXT: ret void
// CHECK4: terminate.handler:
// CHECK4-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
// CHECK4-NEXT: call void @__clang_call_terminate(i8* [[EXN]]) #[[ATTR13]]
// CHECK4-NEXT: unreachable
//
//
// CHECK4-LABEL: define {{[^@]+}}@_ZZN3SSTIdEC1EvENKUlvE_clEv
// CHECK4-SAME: (%class.anon.0* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR4]] align 2 {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon.0*, align 8
// CHECK4-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8
// CHECK4-NEXT: store %class.anon.0* [[THIS]], %class.anon.0** [[THIS_ADDR]], align 8
// CHECK4-NEXT: [[THIS1:%.*]] = load %class.anon.0*, %class.anon.0** [[THIS_ADDR]], align 8
// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], %class.anon.0* [[THIS1]], i32 0, i32 0
// CHECK4-NEXT: [[TMP1:%.*]] = load %struct.SST*, %struct.SST** [[TMP0]], align 8
// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0
// CHECK4-NEXT: store %struct.SST* [[TMP1]], %struct.SST** [[TMP2]], align 8
// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1
// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 1
// CHECK4-NEXT: [[TMP5:%.*]] = load double*, double** [[TMP4]], align 8
// CHECK4-NEXT: store double* [[TMP5]], double** [[TMP3]], align 8
// CHECK4-NEXT: call void @_ZZZN3SSTIdEC1EvENKUlvE_clEvENKUlvE_clEv(%class.anon.1* nonnull align 8 dereferenceable(16) [[REF_TMP]])
// CHECK4-NEXT: ret void
//
//
// CHECK4-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.9
// CHECK4-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR9]] {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK4-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK4-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK4-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK4-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
// CHECK4-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
// CHECK4-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
// CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK4-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to double*
// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
// CHECK4-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
// CHECK4-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to double*
// CHECK4-NEXT: [[TMP12:%.*]] = load double, double* [[TMP11]], align 8
// CHECK4-NEXT: store double [[TMP12]], double* [[TMP8]], align 8
// CHECK4-NEXT: ret void
//
//
// CHECK4-LABEL: define {{[^@]+}}@_ZZZN3SSTIdEC1EvENKUlvE_clEvENKUlvE_clEv
// CHECK4-SAME: (%class.anon.1* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR10]] align 2 {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon.1*, align 8
// CHECK4-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK4-NEXT: store %class.anon.1* [[THIS]], %class.anon.1** [[THIS_ADDR]], align 8
// CHECK4-NEXT: [[THIS1:%.*]] = load %class.anon.1*, %class.anon.1** [[THIS_ADDR]], align 8
// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1:%.*]], %class.anon.1* [[THIS1]], i32 0, i32 0
// CHECK4-NEXT: [[TMP1:%.*]] = load %struct.SST*, %struct.SST** [[TMP0]], align 8
// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 1
// CHECK4-NEXT: [[TMP3:%.*]] = load double*, double** [[TMP2]], align 8
// CHECK4-NEXT: [[TMP4:%.*]] = load double, double* [[TMP3]], align 8
// CHECK4-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
// CHECK4-NEXT: store double [[INC]], double* [[TMP3]], align 8
// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 1
// CHECK4-NEXT: [[TMP6:%.*]] = load double*, double** [[TMP5]], align 8
// CHECK4-NEXT: [[TMP7:%.*]] = load double, double* [[TMP6]], align 8
// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to double*
// CHECK4-NEXT: store double [[TMP7]], double* [[CONV]], align 8
// CHECK4-NEXT: [[TMP8:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SST*, i64)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.SST* [[TMP1]], i64 [[TMP8]])
// CHECK4-NEXT: ret void
//
//
// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..10
// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SST* [[THIS:%.*]], i64 [[A:%.*]]) #[[ATTR12]] {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8
// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK4-NEXT: [[TMP:%.*]] = alloca double*, align 8
// CHECK4-NEXT: [[_TMP1:%.*]] = alloca double*, align 8
// CHECK4-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x i8*], align 8
// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK4-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8
// CHECK4-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8
// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to double*
// CHECK4-NEXT: store double* [[CONV]], double** [[TMP]], align 8
// CHECK4-NEXT: [[TMP1:%.*]] = load double*, double** [[TMP]], align 8
// CHECK4-NEXT: store double* [[TMP1]], double** [[_TMP1]], align 8
// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]])
// CHECK4-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
// CHECK4-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK4: omp_if.then:
// CHECK4-NEXT: [[TMP6:%.*]] = load double*, double** [[_TMP1]], align 8
// CHECK4-NEXT: [[TMP7:%.*]] = load double, double* [[TMP6]], align 8
// CHECK4-NEXT: [[INC:%.*]] = fadd double [[TMP7]], 1.000000e+00
// CHECK4-NEXT: store double [[INC]], double* [[TMP6]], align 8
// CHECK4-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]])
// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK4-NEXT: br label [[OMP_IF_END]]
// CHECK4: omp_if.end:
// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0
// CHECK4-NEXT: [[TMP9:%.*]] = load double*, double** [[_TMP1]], align 8
// CHECK4-NEXT: [[TMP10:%.*]] = bitcast double* [[TMP9]] to i8*
// CHECK4-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 8
// CHECK4-NEXT: [[TMP11:%.*]] = bitcast [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*
// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK4-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i64 8, i8* [[TMP11]], void (i8*, i8*)* @.omp.copyprivate.copy_func.11, i32 [[TMP12]])
// CHECK4-NEXT: ret void
//
//
// CHECK4-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.11
// CHECK4-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR9]] {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK4-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK4-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK4-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK4-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
// CHECK4-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
// CHECK4-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
// CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK4-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to double*
// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
// CHECK4-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
// CHECK4-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to double*
// CHECK4-NEXT: [[TMP12:%.*]] = load double, double* [[TMP11]], align 8
// CHECK4-NEXT: store double [[TMP12]], double* [[TMP8]], align 8
// CHECK4-NEXT: ret void
//
//
// CHECK4-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_single_codegen.cpp
// CHECK4-SAME: () #[[ATTR0]] {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: call void @__cxx_global_var_init()
// CHECK4-NEXT: call void @__cxx_global_var_init.1()
// CHECK4-NEXT: ret void
//
//
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK5-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_.
// CHECK5-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG6:![0-9]+]] {
// CHECK5-NEXT: entry:
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK5-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG9:![0-9]+]]
// CHECK5-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*, !dbg [[DBG9]]
// CHECK5-NEXT: call void @_ZN9TestClassC1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[TMP2]]), !dbg [[DBG10:![0-9]+]]
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG9]]
// CHECK5-NEXT: ret i8* [[TMP3]], !dbg [[DBG9]]
//
//
// CHECK5-LABEL: define {{[^@]+}}@_ZN9TestClassC1Ev
// CHECK5-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] align 2 !dbg [[DBG11:![0-9]+]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %class.TestClass*, align 8
// CHECK5-NEXT: store %class.TestClass* [[THIS]], %class.TestClass** [[THIS_ADDR]], align 8
// CHECK5-NEXT: [[THIS1:%.*]] = load %class.TestClass*, %class.TestClass** [[THIS_ADDR]], align 8
// CHECK5-NEXT: call void @_ZN9TestClassC2Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS1]]), !dbg [[DBG12:![0-9]+]]
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK5-NEXT: ret void, !dbg [[DBG13:![0-9]+]]
//
//
// CHECK5-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_.
// CHECK5-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG14:![0-9]+]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK5-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG15:![0-9]+]]
// CHECK5-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*, !dbg [[DBG15]]
// CHECK5-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[TMP2]]) #[[ATTR3:[0-9]+]], !dbg [[DBG15]]
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK5-NEXT: ret void, !dbg [[DBG16:![0-9]+]]
//
//
// CHECK5-LABEL: define {{[^@]+}}@_ZN9TestClassD1Ev
// CHECK5-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] align 2 !dbg [[DBG17:![0-9]+]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %class.TestClass*, align 8
// CHECK5-NEXT: store %class.TestClass* [[THIS]], %class.TestClass** [[THIS_ADDR]], align 8
// CHECK5-NEXT: [[THIS1:%.*]] = load %class.TestClass*, %class.TestClass** [[THIS_ADDR]], align 8
// CHECK5-NEXT: call void @_ZN9TestClassD2Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR3]], !dbg [[DBG18:![0-9]+]]
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK5-NEXT: ret void, !dbg [[DBG19:![0-9]+]]
//
//
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK5-LABEL: define {{[^@]+}}@.__omp_threadprivate_init_.
// CHECK5-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG20:![0-9]+]] {
// CHECK5-NEXT: entry:
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]), !dbg [[DBG21:![0-9]+]]
// CHECK5-NEXT: call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast (%class.TestClass* @tc to i8*), i8* (i8*)* @.__kmpc_global_ctor_., i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_.), !dbg [[DBG21]]
// CHECK5-NEXT: ret void, !dbg [[DBG21]]
//
//
// CHECK5-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_..1
// CHECK5-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG22:![0-9]+]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK5-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8
// CHECK5-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK5-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG23:![0-9]+]]
// CHECK5-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [2 x %class.TestClass]*, !dbg [[DBG23]]
// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %class.TestClass], [2 x %class.TestClass]* [[TMP2]], i32 0, i32 0, !dbg [[DBG24:![0-9]+]]
// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAY_BEGIN]], i64 2, !dbg [[DBG24]]
// CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]], !dbg [[DBG24]]
// CHECK5: arrayctor.loop:
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK5-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %class.TestClass* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[INVOKE_CONT:%.*]] ], !dbg [[DBG24]]
// CHECK5-NEXT: invoke void @_ZN9TestClassC1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]])
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK5-NEXT: to label [[INVOKE_CONT]] unwind label [[LPAD:%.*]], !dbg [[DBG24]]
// CHECK5: invoke.cont:
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYCTOR_CUR]], i64 1, !dbg [[DBG24]]
// CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]], !dbg [[DBG24]]
// CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]], !dbg [[DBG24]]
// CHECK5: arrayctor.cont:
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG23]]
// CHECK5-NEXT: ret i8* [[TMP3]], !dbg [[DBG23]]
// CHECK5: lpad:
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK5-NEXT: [[TMP4:%.*]] = landingpad { i8*, i32 }
// CHECK5-NEXT: cleanup, !dbg [[DBG25:![0-9]+]]
// CHECK5-NEXT: [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0, !dbg [[DBG25]]
// CHECK5-NEXT: store i8* [[TMP5]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG25]]
// CHECK5-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 1, !dbg [[DBG25]]
// CHECK5-NEXT: store i32 [[TMP6]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG25]]
// CHECK5-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %class.TestClass* [[ARRAY_BEGIN]], [[ARRAYCTOR_CUR]], !dbg [[DBG24]]
// CHECK5-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG24]]
// CHECK5: arraydestroy.body:
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[ARRAYCTOR_CUR]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG24]]
// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG24]]
// CHECK5-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG24]]
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]], !dbg [[DBG24]]
// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG24]]
// CHECK5: arraydestroy.done1:
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK5-NEXT: br label [[EH_RESUME:%.*]], !dbg [[DBG24]]
// CHECK5: eh.resume:
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK5-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG24]]
// CHECK5-NEXT: [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG24]]
// CHECK5-NEXT: [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG24]]
// CHECK5-NEXT: [[LPAD_VAL2:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG24]]
// CHECK5-NEXT: resume { i8*, i32 } [[LPAD_VAL2]], !dbg [[DBG24]]
//
//
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK5-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_..2
// CHECK5-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG26:![0-9]+]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK5-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG27:![0-9]+]]
// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*, !dbg [[DBG27]]
// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAY_BEGIN]], i64 2, !dbg [[DBG27]]
// CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG27]]
// CHECK5: arraydestroy.body:
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG27]]
// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG27]]
// CHECK5-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG27]]
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]], !dbg [[DBG27]]
// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG27]]
// CHECK5: arraydestroy.done1:
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK5-NEXT: ret void, !dbg [[DBG28:![0-9]+]]
//
//
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK5-LABEL: define {{[^@]+}}@.__omp_threadprivate_init_..3
// CHECK5-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG29:![0-9]+]] {
// CHECK5-NEXT: entry:
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]), !dbg [[DBG30:![0-9]+]]
// CHECK5-NEXT: call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB3]], i8* bitcast ([2 x %class.TestClass]* @tc2 to i8*), i8* (i8*)* @.__kmpc_global_ctor_..1, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..2), !dbg [[DBG30]]
// CHECK5-NEXT: ret void, !dbg [[DBG30]]
//
//
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK5-LABEL: define {{[^@]+}}@__cxx_global_var_init
// CHECK5-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG31:![0-9]+]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: call void @_ZN9TestClassC1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) @tc), !dbg [[DBG33:![0-9]+]]
// CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%class.TestClass*)* @_ZN9TestClassD1Ev to void (i8*)*), i8* bitcast (%class.TestClass* @tc to i8*), i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG35:![0-9]+]]
// CHECK5-NEXT: ret void, !dbg [[DBG33]]
//
//
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK5-LABEL: define {{[^@]+}}@__cxx_global_var_init.4
// CHECK5-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG36:![0-9]+]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8
// CHECK5-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
// CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]], !dbg [[DBG37:![0-9]+]]
// CHECK5: arrayctor.loop:
// CHECK5-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %class.TestClass* [ getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[INVOKE_CONT:%.*]] ], !dbg [[DBG37]]
// CHECK5-NEXT: invoke void @_ZN9TestClassC1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]])
// CHECK5-NEXT: to label [[INVOKE_CONT]] unwind label [[LPAD:%.*]], !dbg [[DBG37]]
// CHECK5: invoke.cont:
// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAYCTOR_CUR]], i64 1, !dbg [[DBG37]]
// CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYCTOR_NEXT]], getelementptr inbounds ([[CLASS_TESTCLASS]], %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), i64 2), !dbg [[DBG37]]
// CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]], !dbg [[DBG37]]
// CHECK5: arrayctor.cont:
// CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG39:![0-9]+]]
// CHECK5-NEXT: ret void, !dbg [[DBG39]]
// CHECK5: lpad:
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK5-NEXT: [[TMP1:%.*]] = landingpad { i8*, i32 }
// CHECK5-NEXT: cleanup, !dbg [[DBG40:![0-9]+]]
// CHECK5-NEXT: [[TMP2:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 0, !dbg [[DBG40]]
// CHECK5-NEXT: store i8* [[TMP2]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG40]]
// CHECK5-NEXT: [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 1, !dbg [[DBG40]]
// CHECK5-NEXT: store i32 [[TMP3]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG40]]
// CHECK5-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), [[ARRAYCTOR_CUR]], !dbg [[DBG37]]
// CHECK5-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG37]]
// CHECK5: arraydestroy.body:
// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[ARRAYCTOR_CUR]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG37]]
// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG37]]
// CHECK5-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG37]]
// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), !dbg [[DBG37]]
// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG37]]
// CHECK5: arraydestroy.done1:
// CHECK5-NEXT: br label [[EH_RESUME:%.*]], !dbg [[DBG37]]
// CHECK5: eh.resume:
// CHECK5-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG37]]
// CHECK5-NEXT: [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG37]]
// CHECK5-NEXT: [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG37]]
// CHECK5-NEXT: [[LPAD_VAL2:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG37]]
// CHECK5-NEXT: resume { i8*, i32 } [[LPAD_VAL2]], !dbg [[DBG37]]
//
//
[OpenMP] Overhaul `declare target` handling This patch fixes various issues with our prior `declare target` handling and extends it to support `omp begin declare target` as well. This started with PR49649 in mind, trying to provide a way for users to avoid the "ref" global use introduced for globals with internal linkage. From there it went down the rabbit hole, e.g., all variables, even `nohost` ones, were emitted into the device code so it was impossible to determine if "ref" was needed late in the game (based on the name only). To make it really useful, `begin declare target` was needed as it can carry the `device_type`. Not emitting variables eagerly had a ripple effect. Finally, the precedence of the (explicit) declare target list items needed to be taken into account, that meant we cannot just look for any declare target attribute to make a decision. This caused the handling of functions to require fixup as well. I tried to clean up things while I was at it, e.g., we should not "parse declarations and defintions" as part of OpenMP parsing, this will always break at some point. Instead, we keep track what region we are in and act on definitions and declarations instead, this is what we do for declare variant and other begin/end directives already. Highlights: - new diagnosis for restrictions specificed in the standard, - delayed emission of globals not mentioned in an explicit list of a declare target, - omission of `nohost` globals on the host and `host` globals on the device, - no explicit parsing of declarations in-between `omp [begin] declare variant` and the corresponding end anymore, regular parsing instead, - precedence for explicit mentions in `declare target` lists over implicit mentions in the declaration-definition-seq, and - `omp allocate` declarations will now replace an earlier emitted global, if necessary. --- Notes: The patch is larger than I hoped but it turns out that most changes do on their own lead to "inconsistent states", which seem less desirable overall. After working through this I feel the standard should remove the explicit declare target forms as the delayed emission is horrible. That said, while we delay things anyway, it seems to me we check too often for the current status even though that is often not sufficient to act upon. There seems to be a lot of duplication that can probably be trimmed down. Eagerly emitting some things seems pretty weak as an argument to keep so much logic around. --- Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D101030
2021-04-22 13:57:28 +08:00
// CHECK5-LABEL: define {{[^@]+}}@__cxx_global_array_dtor
// CHECK5-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG41:![0-9]+]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG42:![0-9]+]]
// CHECK5: arraydestroy.body:
// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ getelementptr inbounds ([[CLASS_TESTCLASS:%.*]], %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG42]]
// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG42]]
// CHECK5-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG42]]
// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), !dbg [[DBG42]]
// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG42]]
// CHECK5: arraydestroy.done1:
// CHECK5-NEXT: ret void, !dbg [[DBG42]]
//
//
// CHECK5-LABEL: define {{[^@]+}}@_ZN9TestClassC2Ev
// CHECK5-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG43:![0-9]+]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %class.TestClass*, align 8
// CHECK5-NEXT: store %class.TestClass* [[THIS]], %class.TestClass** [[THIS_ADDR]], align 8
// CHECK5-NEXT: [[THIS1:%.*]] = load %class.TestClass*, %class.TestClass** [[THIS_ADDR]], align 8
// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[THIS1]], i32 0, i32 0, !dbg [[DBG44:![0-9]+]]
// CHECK5-NEXT: store i32 0, i32* [[A]], align 4, !dbg [[DBG44]]
// CHECK5-NEXT: ret void, !dbg [[DBG45:![0-9]+]]
//
//
// CHECK5-LABEL: define {{[^@]+}}@_ZN9TestClassD2Ev
// CHECK5-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG46:![0-9]+]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %class.TestClass*, align 8
// CHECK5-NEXT: store %class.TestClass* [[THIS]], %class.TestClass** [[THIS_ADDR]], align 8
// CHECK5-NEXT: [[THIS1:%.*]] = load %class.TestClass*, %class.TestClass** [[THIS_ADDR]], align 8
// CHECK5-NEXT: ret void, !dbg [[DBG47:![0-9]+]]
//
//
// CHECK5-LABEL: define {{[^@]+}}@_Z3foov
// CHECK5-SAME: () #[[ATTR4:[0-9]+]] !dbg [[DBG48:![0-9]+]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: call void @_Z8mayThrowv(), !dbg [[DBG49:![0-9]+]]
// CHECK5-NEXT: ret void, !dbg [[DBG50:![0-9]+]]
//
//
// CHECK5-LABEL: define {{[^@]+}}@main
// CHECK5-SAME: () #[[ATTR6:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG51:![0-9]+]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[A:%.*]] = alloca i8, align 1
// CHECK5-NEXT: [[A2:%.*]] = alloca [2 x i8], align 1
// CHECK5-NEXT: [[C:%.*]] = alloca %class.TestClass*, align 8
// CHECK5-NEXT: [[SST:%.*]] = alloca [[STRUCT_SST:%.*]], align 8
// CHECK5-NEXT: [[SS:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
// CHECK5-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8
// CHECK5-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [5 x i8*], align 8
// CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB5:[0-9]+]])
// CHECK5-NEXT: store i32 0, i32* [[RETVAL]], align 4
// CHECK5-NEXT: store %class.TestClass* @tc, %class.TestClass** [[C]], align 8, !dbg [[DBG52:![0-9]+]]
// CHECK5-NEXT: call void @_ZN3SSTIdEC1Ev(%struct.SST* nonnull align 8 dereferenceable(8) [[SST]]), !dbg [[DBG53:![0-9]+]]
// CHECK5-NEXT: call void @_ZN2SSC1ERi(%struct.SS* nonnull align 8 dereferenceable(16) [[SS]], i32* nonnull align 4 dereferenceable(4) getelementptr inbounds ([[CLASS_TESTCLASS:%.*]], %class.TestClass* @tc, i32 0, i32 0)), !dbg [[DBG54:![0-9]+]]
// CHECK5-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]]), !dbg [[DBG55:![0-9]+]]
// CHECK5-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0, !dbg [[DBG55]]
// CHECK5-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG55]]
// CHECK5: omp_if.then:
// CHECK5-NEXT: store i8 2, i8* [[A]], align 1, !dbg [[DBG56:![0-9]+]]
// CHECK5-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]]), !dbg [[DBG57:![0-9]+]]
// CHECK5-NEXT: br label [[OMP_IF_END]], !dbg [[DBG57]]
// CHECK5: omp_if.end:
// CHECK5-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB7:[0-9]+]], i32 [[TMP0]]), !dbg [[DBG58:![0-9]+]]
// CHECK5-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0, !dbg [[DBG58]]
// CHECK5-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN1:%.*]], label [[OMP_IF_END2:%.*]], !dbg [[DBG58]]
// CHECK5: omp_if.then1:
// CHECK5-NEXT: store i8 2, i8* [[A]], align 1, !dbg [[DBG59:![0-9]+]]
// CHECK5-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB7]], i32 [[TMP0]]), !dbg [[DBG60:![0-9]+]]
// CHECK5-NEXT: br label [[OMP_IF_END2]], !dbg [[DBG60]]
// CHECK5: omp_if.end2:
// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB8:[0-9]+]], i32 [[TMP0]]), !dbg [[DBG61:![0-9]+]]
// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG62:![0-9]+]]
// CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB10:[0-9]+]], i32 [[TMP0]]), !dbg [[DBG62]]
// CHECK5-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0, !dbg [[DBG62]]
// CHECK5-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN3:%.*]], label [[OMP_IF_END4:%.*]], !dbg [[DBG62]]
// CHECK5: omp_if.then3:
// CHECK5-NEXT: invoke void @_Z3foov()
// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG63:![0-9]+]]
// CHECK5: invoke.cont:
// CHECK5-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB10]], i32 [[TMP0]]), !dbg [[DBG63]]
// CHECK5-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG63]]
// CHECK5-NEXT: br label [[OMP_IF_END4]], !dbg [[DBG63]]
// CHECK5: lpad:
// CHECK5-NEXT: [[TMP7:%.*]] = landingpad { i8*, i32 }
// CHECK5-NEXT: catch i8* null, !dbg [[DBG64:![0-9]+]]
// CHECK5-NEXT: [[TMP8:%.*]] = extractvalue { i8*, i32 } [[TMP7]], 0, !dbg [[DBG64]]
// CHECK5-NEXT: store i8* [[TMP8]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG64]]
// CHECK5-NEXT: [[TMP9:%.*]] = extractvalue { i8*, i32 } [[TMP7]], 1, !dbg [[DBG64]]
// CHECK5-NEXT: store i32 [[TMP9]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG64]]
// CHECK5-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB10]], i32 [[TMP0]]), !dbg [[DBG63]]
// CHECK5-NEXT: br label [[TERMINATE_HANDLER:%.*]], !dbg [[DBG63]]
// CHECK5: omp_if.end4:
// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0, !dbg [[DBG63]]
// CHECK5-NEXT: store i8* [[A]], i8** [[TMP10]], align 8, !dbg [[DBG63]]
// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1, !dbg [[DBG63]]
// CHECK5-NEXT: store i8* bitcast (%class.TestClass* @tc to i8*), i8** [[TMP11]], align 8, !dbg [[DBG63]]
// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2, !dbg [[DBG63]]
// CHECK5-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB12:[0-9]+]], i32 [[TMP0]], i8* bitcast (%class.TestClass* @tc to i8*), i64 4, i8*** @tc.cache.), !dbg [[DBG65:![0-9]+]]
// CHECK5-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to %class.TestClass*, !dbg [[DBG65]]
// CHECK5-NEXT: [[TMP15:%.*]] = bitcast %class.TestClass* [[TMP14]] to i8*, !dbg [[DBG63]]
// CHECK5-NEXT: store i8* [[TMP15]], i8** [[TMP12]], align 8, !dbg [[DBG63]]
// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 3, !dbg [[DBG63]]
// CHECK5-NEXT: [[TMP17:%.*]] = bitcast [2 x i8]* [[A2]] to i8*, !dbg [[DBG63]]
// CHECK5-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8, !dbg [[DBG63]]
// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 4, !dbg [[DBG63]]
// CHECK5-NEXT: [[TMP19:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB14:[0-9]+]], i32 [[TMP0]], i8* bitcast ([2 x %class.TestClass]* @tc2 to i8*), i64 8, i8*** @tc2.cache.), !dbg [[DBG66:![0-9]+]]
// CHECK5-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to [2 x %class.TestClass]*, !dbg [[DBG66]]
// CHECK5-NEXT: [[TMP21:%.*]] = bitcast [2 x %class.TestClass]* [[TMP20]] to i8*, !dbg [[DBG63]]
// CHECK5-NEXT: store i8* [[TMP21]], i8** [[TMP18]], align 8, !dbg [[DBG63]]
// CHECK5-NEXT: [[TMP22:%.*]] = bitcast [5 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*, !dbg [[DBG63]]
// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG63]]
// CHECK5-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB10]], i32 [[TMP0]], i64 40, i8* [[TMP22]], void (i8*, i8*)* @.omp.copyprivate.copy_func, i32 [[TMP23]]), !dbg [[DBG63]]
// CHECK5-NEXT: [[TMP24:%.*]] = load i8, i8* [[A]], align 1, !dbg [[DBG67:![0-9]+]]
// CHECK5-NEXT: [[CONV:%.*]] = sext i8 [[TMP24]] to i32, !dbg [[DBG67]]
// CHECK5-NEXT: ret i32 [[CONV]], !dbg [[DBG68:![0-9]+]]
// CHECK5: terminate.handler:
// CHECK5-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG63]]
// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[EXN]]) #[[ATTR13:[0-9]+]], !dbg [[DBG63]]
// CHECK5-NEXT: unreachable, !dbg [[DBG63]]
//
//
// CHECK5-LABEL: define {{[^@]+}}@_ZN3SSTIdEC1Ev
// CHECK5-SAME: (%struct.SST* nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 !dbg [[DBG69:![0-9]+]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8
// CHECK5-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8
// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8
// CHECK5-NEXT: call void @_ZN3SSTIdEC2Ev(%struct.SST* nonnull align 8 dereferenceable(8) [[THIS1]]), !dbg [[DBG70:![0-9]+]]
// CHECK5-NEXT: ret void, !dbg [[DBG71:![0-9]+]]
//
//
// CHECK5-LABEL: define {{[^@]+}}@_ZN2SSC1ERi
// CHECK5-SAME: (%struct.SS* nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32* nonnull align 4 dereferenceable(4) [[D:%.*]]) unnamed_addr #[[ATTR1]] align 2 !dbg [[DBG72:![0-9]+]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8
// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca i32*, align 8
// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8
// CHECK5-NEXT: store i32* [[D]], i32** [[D_ADDR]], align 8
// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8
// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[D_ADDR]], align 8, !dbg [[DBG73:![0-9]+]]
// CHECK5-NEXT: call void @_ZN2SSC2ERi(%struct.SS* nonnull align 8 dereferenceable(16) [[THIS1]], i32* nonnull align 4 dereferenceable(4) [[TMP0]]), !dbg [[DBG73]]
// CHECK5-NEXT: ret void, !dbg [[DBG74:![0-9]+]]
//
//
// CHECK5-LABEL: define {{[^@]+}}@__clang_call_terminate
// CHECK5-SAME: (i8* [[TMP0:%.*]]) #[[ATTR8:[0-9]+]] {
// CHECK5-NEXT: [[TMP2:%.*]] = call i8* @__cxa_begin_catch(i8* [[TMP0]]) #[[ATTR3]]
// CHECK5-NEXT: call void @_ZSt9terminatev() #[[ATTR13]]
// CHECK5-NEXT: unreachable
//
//
// CHECK5-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func
// CHECK5-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR9:[0-9]+]] !dbg [[DBG75:![0-9]+]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK5-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK5-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG76:![0-9]+]]
// CHECK5-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [5 x i8*]*, !dbg [[DBG76]]
// CHECK5-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8, !dbg [[DBG76]]
// CHECK5-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [5 x i8*]*, !dbg [[DBG76]]
// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP3]], i64 0, i64 0, !dbg [[DBG76]]
// CHECK5-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8, !dbg [[DBG76]]
// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP5]], i64 0, i64 0, !dbg [[DBG76]]
// CHECK5-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8, !dbg [[DBG76]]
// CHECK5-NEXT: [[TMP10:%.*]] = load i8, i8* [[TMP9]], align 1, !dbg [[DBG77:![0-9]+]]
// CHECK5-NEXT: store i8 [[TMP10]], i8* [[TMP7]], align 1, !dbg [[DBG77]]
// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP3]], i64 0, i64 1, !dbg [[DBG76]]
// CHECK5-NEXT: [[TMP12:%.*]] = load i8*, i8** [[TMP11]], align 8, !dbg [[DBG76]]
// CHECK5-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to %class.TestClass*, !dbg [[DBG76]]
// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP5]], i64 0, i64 1, !dbg [[DBG76]]
// CHECK5-NEXT: [[TMP15:%.*]] = load i8*, i8** [[TMP14]], align 8, !dbg [[DBG76]]
// CHECK5-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to %class.TestClass*, !dbg [[DBG76]]
// CHECK5-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(4) %class.TestClass* @_ZN9TestClassaSERKS_(%class.TestClass* nonnull align 4 dereferenceable(4) [[TMP13]], %class.TestClass* nonnull align 4 dereferenceable(4) [[TMP16]]), !dbg [[DBG78:![0-9]+]]
// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP3]], i64 0, i64 2, !dbg [[DBG76]]
// CHECK5-NEXT: [[TMP18:%.*]] = load i8*, i8** [[TMP17]], align 8, !dbg [[DBG76]]
// CHECK5-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP18]] to %class.TestClass*, !dbg [[DBG76]]
// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP5]], i64 0, i64 2, !dbg [[DBG76]]
// CHECK5-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8, !dbg [[DBG76]]
// CHECK5-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP21]] to %class.TestClass*, !dbg [[DBG76]]
// CHECK5-NEXT: [[CALL2:%.*]] = call nonnull align 4 dereferenceable(4) %class.TestClass* @_ZN9TestClassaSERKS_(%class.TestClass* nonnull align 4 dereferenceable(4) [[TMP19]], %class.TestClass* nonnull align 4 dereferenceable(4) [[TMP22]]), !dbg [[DBG79:![0-9]+]]
// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP3]], i64 0, i64 3, !dbg [[DBG76]]
// CHECK5-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP23]], align 8, !dbg [[DBG76]]
// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP5]], i64 0, i64 3, !dbg [[DBG76]]
// CHECK5-NEXT: [[TMP26:%.*]] = load i8*, i8** [[TMP25]], align 8, !dbg [[DBG76]]
// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[TMP24]], i8* align 1 [[TMP26]], i64 2, i1 false), !dbg [[DBG76]]
// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP3]], i64 0, i64 4, !dbg [[DBG76]]
// CHECK5-NEXT: [[TMP28:%.*]] = load i8*, i8** [[TMP27]], align 8, !dbg [[DBG76]]
// CHECK5-NEXT: [[TMP29:%.*]] = bitcast i8* [[TMP28]] to %class.TestClass*, !dbg [[DBG76]]
// CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[TMP5]], i64 0, i64 4, !dbg [[DBG76]]
// CHECK5-NEXT: [[TMP31:%.*]] = load i8*, i8** [[TMP30]], align 8, !dbg [[DBG76]]
// CHECK5-NEXT: [[TMP32:%.*]] = bitcast i8* [[TMP31]] to %class.TestClass*, !dbg [[DBG76]]
// CHECK5-NEXT: [[TMP33:%.*]] = getelementptr [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[TMP29]], i64 2, !dbg [[DBG76]]
// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %class.TestClass* [[TMP29]], [[TMP33]], !dbg [[DBG76]]
// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]], !dbg [[DBG76]]
// CHECK5: omp.arraycpy.body:
// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %class.TestClass* [ [[TMP32]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ], !dbg [[DBG76]]
// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %class.TestClass* [ [[TMP29]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ], !dbg [[DBG76]]
// CHECK5-NEXT: [[CALL3:%.*]] = call nonnull align 4 dereferenceable(4) %class.TestClass* @_ZN9TestClassaSERKS_(%class.TestClass* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %class.TestClass* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]), !dbg [[DBG80:![0-9]+]]
// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[CLASS_TESTCLASS]], %class.TestClass* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1, !dbg [[DBG76]]
// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[CLASS_TESTCLASS]], %class.TestClass* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1, !dbg [[DBG76]]
// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %class.TestClass* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP33]], !dbg [[DBG76]]
// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]], !dbg [[DBG76]]
// CHECK5: omp.arraycpy.done4:
// CHECK5-NEXT: ret void, !dbg [[DBG80]]
//
//
// CHECK5-LABEL: define {{[^@]+}}@_ZN9TestClassaSERKS_
// CHECK5-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]], %class.TestClass* nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR10:[0-9]+]] align 2 !dbg [[DBG81:![0-9]+]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %class.TestClass*, align 8
// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca %class.TestClass*, align 8
// CHECK5-NEXT: store %class.TestClass* [[THIS]], %class.TestClass** [[THIS_ADDR]], align 8
// CHECK5-NEXT: store %class.TestClass* [[TMP0]], %class.TestClass** [[DOTADDR]], align 8
// CHECK5-NEXT: [[THIS1:%.*]] = load %class.TestClass*, %class.TestClass** [[THIS_ADDR]], align 8
// CHECK5-NEXT: ret %class.TestClass* [[THIS1]], !dbg [[DBG82:![0-9]+]]
//
//
// CHECK5-LABEL: define {{[^@]+}}@_ZN3SSTIdEC2Ev
// CHECK5-SAME: (%struct.SST* nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG83:![0-9]+]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8
// CHECK5-NEXT: [[A2:%.*]] = alloca double*, align 8
// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK5-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8
// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8
// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], %struct.SST* [[THIS1]], i32 0, i32 0, !dbg [[DBG84:![0-9]+]]
// CHECK5-NEXT: store double 0.000000e+00, double* [[A]], align 8, !dbg [[DBG84]]
// CHECK5-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SST]], %struct.SST* [[THIS1]], i32 0, i32 0, !dbg [[DBG85:![0-9]+]]
// CHECK5-NEXT: store double* [[A3]], double** [[A2]], align 8, !dbg [[DBG85]]
// CHECK5-NEXT: [[TMP0:%.*]] = load double*, double** [[A2]], align 8, !dbg [[DBG86:![0-9]+]]
// CHECK5-NEXT: [[TMP1:%.*]] = load double, double* [[TMP0]], align 8, !dbg [[DBG87:![0-9]+]]
// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to double*, !dbg [[DBG87]]
// CHECK5-NEXT: store double [[TMP1]], double* [[CONV]], align 8, !dbg [[DBG87]]
// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8, !dbg [[DBG87]]
// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB18:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SST*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SST* [[THIS1]], i64 [[TMP2]]), !dbg [[DBG87]]
// CHECK5-NEXT: ret void, !dbg [[DBG88:![0-9]+]]
//
//
// CHECK5-LABEL: define {{[^@]+}}@.omp_outlined.
// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SST* [[THIS:%.*]], i64 [[A:%.*]]) #[[ATTR12:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG89:![0-9]+]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8
// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK5-NEXT: [[TMP:%.*]] = alloca double*, align 8
// CHECK5-NEXT: [[_TMP1:%.*]] = alloca double*, align 8
// CHECK5-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 8
// CHECK5-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8
// CHECK5-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x i8*], align 8
// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK5-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8
// CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8, !dbg [[DBG90:![0-9]+]]
// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to double*, !dbg [[DBG90]]
// CHECK5-NEXT: store double* [[CONV]], double** [[TMP]], align 8, !dbg [[DBG90]]
// CHECK5-NEXT: [[TMP1:%.*]] = load double*, double** [[TMP]], align 8, !dbg [[DBG91:![0-9]+]]
// CHECK5-NEXT: store double* [[TMP1]], double** [[_TMP1]], align 8, !dbg [[DBG92:![0-9]+]]
// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG92]]
// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG92]]
// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4, !dbg [[DBG92]]
// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB16:[0-9]+]], i32 [[TMP3]]), !dbg [[DBG92]]
// CHECK5-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0, !dbg [[DBG92]]
// CHECK5-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG92]]
// CHECK5: omp_if.then:
// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 0, !dbg [[DBG93:![0-9]+]]
// CHECK5-NEXT: store %struct.SST* [[TMP0]], %struct.SST** [[TMP6]], align 8, !dbg [[DBG93]]
// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 1, !dbg [[DBG93]]
// CHECK5-NEXT: [[TMP8:%.*]] = load double*, double** [[_TMP1]], align 8, !dbg [[DBG94:![0-9]+]]
// CHECK5-NEXT: store double* [[TMP8]], double** [[TMP7]], align 8, !dbg [[DBG93]]
// CHECK5-NEXT: invoke void @_ZZN3SSTIdEC1EvENKUlvE_clEv(%class.anon* nonnull align 8 dereferenceable(16) [[REF_TMP]])
// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG93]]
// CHECK5: invoke.cont:
// CHECK5-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB16]], i32 [[TMP3]]), !dbg [[DBG93]]
// CHECK5-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG93]]
// CHECK5-NEXT: br label [[OMP_IF_END]], !dbg [[DBG93]]
// CHECK5: lpad:
// CHECK5-NEXT: [[TMP9:%.*]] = landingpad { i8*, i32 }
// CHECK5-NEXT: catch i8* null, !dbg [[DBG95:![0-9]+]]
// CHECK5-NEXT: [[TMP10:%.*]] = extractvalue { i8*, i32 } [[TMP9]], 0, !dbg [[DBG95]]
// CHECK5-NEXT: store i8* [[TMP10]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG95]]
// CHECK5-NEXT: [[TMP11:%.*]] = extractvalue { i8*, i32 } [[TMP9]], 1, !dbg [[DBG95]]
// CHECK5-NEXT: store i32 [[TMP11]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG95]]
// CHECK5-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB16]], i32 [[TMP3]]), !dbg [[DBG93]]
// CHECK5-NEXT: br label [[TERMINATE_HANDLER:%.*]], !dbg [[DBG93]]
// CHECK5: omp_if.end:
// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0, !dbg [[DBG93]]
// CHECK5-NEXT: [[TMP13:%.*]] = load double*, double** [[_TMP1]], align 8, !dbg [[DBG96:![0-9]+]]
// CHECK5-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to i8*, !dbg [[DBG93]]
// CHECK5-NEXT: store i8* [[TMP14]], i8** [[TMP12]], align 8, !dbg [[DBG93]]
// CHECK5-NEXT: [[TMP15:%.*]] = bitcast [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*, !dbg [[DBG93]]
// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG93]]
// CHECK5-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB16]], i32 [[TMP3]], i64 8, i8* [[TMP15]], void (i8*, i8*)* @.omp.copyprivate.copy_func.5, i32 [[TMP16]]), !dbg [[DBG93]]
// CHECK5-NEXT: ret void, !dbg [[DBG97:![0-9]+]]
// CHECK5: terminate.handler:
// CHECK5-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG93]]
// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[EXN]]) #[[ATTR13]], !dbg [[DBG93]]
// CHECK5-NEXT: unreachable, !dbg [[DBG93]]
//
//
// CHECK5-LABEL: define {{[^@]+}}@_ZZN3SSTIdEC1EvENKUlvE_clEv
// CHECK5-SAME: (%class.anon* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR4]] align 2 !dbg [[DBG98:![0-9]+]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon*, align 8
// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8
// CHECK5-NEXT: store %class.anon* [[THIS]], %class.anon** [[THIS_ADDR]], align 8
// CHECK5-NEXT: [[THIS1:%.*]] = load %class.anon*, %class.anon** [[THIS_ADDR]], align 8
// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON:%.*]], %class.anon* [[THIS1]], i32 0, i32 0
// CHECK5-NEXT: [[TMP1:%.*]] = load %struct.SST*, %struct.SST** [[TMP0]], align 8
// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0, !dbg [[DBG99:![0-9]+]]
// CHECK5-NEXT: store %struct.SST* [[TMP1]], %struct.SST** [[TMP2]], align 8, !dbg [[DBG99]]
// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1, !dbg [[DBG99]]
// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[THIS1]], i32 0, i32 1, !dbg [[DBG100:![0-9]+]]
// CHECK5-NEXT: [[TMP5:%.*]] = load double*, double** [[TMP4]], align 8, !dbg [[DBG100]]
// CHECK5-NEXT: store double* [[TMP5]], double** [[TMP3]], align 8, !dbg [[DBG99]]
// CHECK5-NEXT: call void @_ZZZN3SSTIdEC1EvENKUlvE_clEvENKUlvE_clEv(%class.anon.0* nonnull align 8 dereferenceable(16) [[REF_TMP]]), !dbg [[DBG99]]
// CHECK5-NEXT: ret void, !dbg [[DBG101:![0-9]+]]
//
//
// CHECK5-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.5
// CHECK5-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR9]] !dbg [[DBG102:![0-9]+]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK5-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK5-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG103:![0-9]+]]
// CHECK5-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*, !dbg [[DBG103]]
// CHECK5-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8, !dbg [[DBG103]]
// CHECK5-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*, !dbg [[DBG103]]
// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0, !dbg [[DBG103]]
// CHECK5-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8, !dbg [[DBG103]]
// CHECK5-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to double*, !dbg [[DBG103]]
// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0, !dbg [[DBG103]]
// CHECK5-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8, !dbg [[DBG103]]
// CHECK5-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to double*, !dbg [[DBG103]]
// CHECK5-NEXT: [[TMP12:%.*]] = load double, double* [[TMP11]], align 8, !dbg [[DBG104:![0-9]+]]
// CHECK5-NEXT: store double [[TMP12]], double* [[TMP8]], align 8, !dbg [[DBG104]]
// CHECK5-NEXT: ret void, !dbg [[DBG104]]
//
//
// CHECK5-LABEL: define {{[^@]+}}@_ZZZN3SSTIdEC1EvENKUlvE_clEvENKUlvE_clEv
// CHECK5-SAME: (%class.anon.0* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR10]] align 2 !dbg [[DBG107:![0-9]+]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon.0*, align 8
// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK5-NEXT: store %class.anon.0* [[THIS]], %class.anon.0** [[THIS_ADDR]], align 8
// CHECK5-NEXT: [[THIS1:%.*]] = load %class.anon.0*, %class.anon.0** [[THIS_ADDR]], align 8
// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], %class.anon.0* [[THIS1]], i32 0, i32 0
// CHECK5-NEXT: [[TMP1:%.*]] = load %struct.SST*, %struct.SST** [[TMP0]], align 8
// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 1, !dbg [[DBG108:![0-9]+]]
// CHECK5-NEXT: [[TMP3:%.*]] = load double*, double** [[TMP2]], align 8, !dbg [[DBG108]]
// CHECK5-NEXT: [[TMP4:%.*]] = load double, double* [[TMP3]], align 8, !dbg [[DBG109:![0-9]+]]
// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00, !dbg [[DBG109]]
// CHECK5-NEXT: store double [[INC]], double* [[TMP3]], align 8, !dbg [[DBG109]]
// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 1, !dbg [[DBG110:![0-9]+]]
// CHECK5-NEXT: [[TMP6:%.*]] = load double*, double** [[TMP5]], align 8, !dbg [[DBG110]]
// CHECK5-NEXT: [[TMP7:%.*]] = load double, double* [[TMP6]], align 8, !dbg [[DBG111:![0-9]+]]
// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to double*, !dbg [[DBG111]]
// CHECK5-NEXT: store double [[TMP7]], double* [[CONV]], align 8, !dbg [[DBG111]]
// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[A_CASTED]], align 8, !dbg [[DBG111]]
// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB22:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SST*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SST* [[TMP1]], i64 [[TMP8]]), !dbg [[DBG111]]
// CHECK5-NEXT: ret void, !dbg [[DBG112:![0-9]+]]
//
//
// CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6
// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SST* [[THIS:%.*]], i64 [[A:%.*]]) #[[ATTR12]] !dbg [[DBG113:![0-9]+]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8
// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK5-NEXT: [[TMP:%.*]] = alloca double*, align 8
// CHECK5-NEXT: [[_TMP1:%.*]] = alloca double*, align 8
// CHECK5-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x i8*], align 8
// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK5-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8
// CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8, !dbg [[DBG114:![0-9]+]]
// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to double*, !dbg [[DBG114]]
// CHECK5-NEXT: store double* [[CONV]], double** [[TMP]], align 8, !dbg [[DBG114]]
// CHECK5-NEXT: [[TMP1:%.*]] = load double*, double** [[TMP]], align 8, !dbg [[DBG115:![0-9]+]]
// CHECK5-NEXT: store double* [[TMP1]], double** [[_TMP1]], align 8, !dbg [[DBG116:![0-9]+]]
// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG116]]
// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG116]]
// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4, !dbg [[DBG116]]
// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB20:[0-9]+]], i32 [[TMP3]]), !dbg [[DBG116]]
// CHECK5-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0, !dbg [[DBG116]]
// CHECK5-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG116]]
// CHECK5: omp_if.then:
// CHECK5-NEXT: [[TMP6:%.*]] = load double*, double** [[_TMP1]], align 8, !dbg [[DBG115]]
// CHECK5-NEXT: [[TMP7:%.*]] = load double, double* [[TMP6]], align 8, !dbg [[DBG117:![0-9]+]]
// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP7]], 1.000000e+00, !dbg [[DBG117]]
// CHECK5-NEXT: store double [[INC]], double* [[TMP6]], align 8, !dbg [[DBG117]]
// CHECK5-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB20]], i32 [[TMP3]]), !dbg [[DBG117]]
// CHECK5-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG117]]
// CHECK5-NEXT: br label [[OMP_IF_END]], !dbg [[DBG117]]
// CHECK5: omp_if.end:
// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0, !dbg [[DBG117]]
// CHECK5-NEXT: [[TMP9:%.*]] = load double*, double** [[_TMP1]], align 8, !dbg [[DBG118:![0-9]+]]
// CHECK5-NEXT: [[TMP10:%.*]] = bitcast double* [[TMP9]] to i8*, !dbg [[DBG117]]
// CHECK5-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 8, !dbg [[DBG117]]
// CHECK5-NEXT: [[TMP11:%.*]] = bitcast [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*, !dbg [[DBG117]]
// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG117]]
// CHECK5-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB20]], i32 [[TMP3]], i64 8, i8* [[TMP11]], void (i8*, i8*)* @.omp.copyprivate.copy_func.7, i32 [[TMP12]]), !dbg [[DBG117]]
// CHECK5-NEXT: ret void, !dbg [[DBG119:![0-9]+]]
//
//
// CHECK5-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.7
// CHECK5-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR9]] !dbg [[DBG120:![0-9]+]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK5-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK5-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG121:![0-9]+]]
// CHECK5-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*, !dbg [[DBG121]]
// CHECK5-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8, !dbg [[DBG121]]
// CHECK5-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*, !dbg [[DBG121]]
// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0, !dbg [[DBG121]]
// CHECK5-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8, !dbg [[DBG121]]
// CHECK5-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to double*, !dbg [[DBG121]]
// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0, !dbg [[DBG121]]
// CHECK5-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8, !dbg [[DBG121]]
// CHECK5-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to double*, !dbg [[DBG121]]
// CHECK5-NEXT: [[TMP12:%.*]] = load double, double* [[TMP11]], align 8, !dbg [[DBG122:![0-9]+]]
// CHECK5-NEXT: store double [[TMP12]], double* [[TMP8]], align 8, !dbg [[DBG122]]
// CHECK5-NEXT: ret void, !dbg [[DBG122]]
//
//
// CHECK5-LABEL: define {{[^@]+}}@_ZN2SSC2ERi
// CHECK5-SAME: (%struct.SS* nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32* nonnull align 4 dereferenceable(4) [[D:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG123:![0-9]+]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8
// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca i32*, align 8
// CHECK5-NEXT: [[A2:%.*]] = alloca i32*, align 8
// CHECK5-NEXT: [[B4:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[C7:%.*]] = alloca i32*, align 8
// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK5-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
// CHECK5-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8
// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8
// CHECK5-NEXT: store i32* [[D]], i32** [[D_ADDR]], align 8
// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8
// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[THIS1]], i32 0, i32 0, !dbg [[DBG124:![0-9]+]]
// CHECK5-NEXT: store i32 0, i32* [[A]], align 8, !dbg [[DBG124]]
// CHECK5-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 1, !dbg [[DBG125:![0-9]+]]
// CHECK5-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[B]], align 4, !dbg [[DBG125]]
// CHECK5-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -16, !dbg [[DBG125]]
// CHECK5-NEXT: store i8 [[BF_CLEAR]], i8* [[B]], align 4, !dbg [[DBG125]]
// CHECK5-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 2, !dbg [[DBG126:![0-9]+]]
// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[D_ADDR]], align 8, !dbg [[DBG127:![0-9]+]]
// CHECK5-NEXT: store i32* [[TMP0]], i32** [[C]], align 8, !dbg [[DBG126]]
// CHECK5-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 0, !dbg [[DBG128:![0-9]+]]
// CHECK5-NEXT: store i32* [[A3]], i32** [[A2]], align 8, !dbg [[DBG128]]
// CHECK5-NEXT: [[B5:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 1, !dbg [[DBG129:![0-9]+]]
// CHECK5-NEXT: [[BF_LOAD6:%.*]] = load i8, i8* [[B5]], align 4, !dbg [[DBG129]]
// CHECK5-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD6]], 4, !dbg [[DBG129]]
// CHECK5-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 4, !dbg [[DBG129]]
// CHECK5-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32, !dbg [[DBG129]]
// CHECK5-NEXT: store i32 [[BF_CAST]], i32* [[B4]], align 4, !dbg [[DBG129]]
// CHECK5-NEXT: [[C8:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 2, !dbg [[DBG130:![0-9]+]]
// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[C8]], align 8, !dbg [[DBG130]]
// CHECK5-NEXT: store i32* [[TMP1]], i32** [[C7]], align 8, !dbg [[DBG130]]
// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A2]], align 8, !dbg [[DBG131:![0-9]+]]
// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4, !dbg [[DBG132:![0-9]+]]
// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*, !dbg [[DBG132]]
// CHECK5-NEXT: store i32 [[TMP3]], i32* [[CONV]], align 4, !dbg [[DBG132]]
// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8, !dbg [[DBG132]]
// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[B4]], align 4, !dbg [[DBG132]]
// CHECK5-NEXT: [[CONV9:%.*]] = bitcast i64* [[B_CASTED]] to i32*, !dbg [[DBG132]]
// CHECK5-NEXT: store i32 [[TMP5]], i32* [[CONV9]], align 4, !dbg [[DBG132]]
// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8, !dbg [[DBG132]]
// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[C7]], align 8, !dbg [[DBG133:![0-9]+]]
// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4, !dbg [[DBG132]]
// CHECK5-NEXT: [[CONV10:%.*]] = bitcast i64* [[C_CASTED]] to i32*, !dbg [[DBG132]]
// CHECK5-NEXT: store i32 [[TMP8]], i32* [[CONV10]], align 4, !dbg [[DBG132]]
// CHECK5-NEXT: [[TMP9:%.*]] = load i64, i64* [[C_CASTED]], align 8, !dbg [[DBG132]]
// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB26:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i64, i64, i64)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.SS* [[THIS1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP9]]), !dbg [[DBG132]]
// CHECK5-NEXT: ret void, !dbg [[DBG134:![0-9]+]]
//
//
// CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..8
// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SS* [[THIS:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]]) #[[ATTR12]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG135:![0-9]+]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8
// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8
// CHECK5-NEXT: [[TMP:%.*]] = alloca i32*, align 8
// CHECK5-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8
// CHECK5-NEXT: [[_TMP4:%.*]] = alloca i32*, align 8
// CHECK5-NEXT: [[_TMP5:%.*]] = alloca i32*, align 8
// CHECK5-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8
// CHECK5-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8
// CHECK5-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x i8*], align 8
// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8
// CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK5-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK5-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8
// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8, !dbg [[DBG136:![0-9]+]]
// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*, !dbg [[DBG136]]
// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32*, !dbg [[DBG136]]
// CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[C_ADDR]] to i32*, !dbg [[DBG136]]
// CHECK5-NEXT: store i32* [[CONV]], i32** [[TMP]], align 8, !dbg [[DBG136]]
// CHECK5-NEXT: store i32* [[CONV2]], i32** [[_TMP3]], align 8, !dbg [[DBG136]]
// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP]], align 8, !dbg [[DBG137:![0-9]+]]
// CHECK5-NEXT: store i32* [[TMP1]], i32** [[_TMP4]], align 8, !dbg [[DBG138:![0-9]+]]
// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[_TMP3]], align 8, !dbg [[DBG139:![0-9]+]]
// CHECK5-NEXT: store i32* [[TMP2]], i32** [[_TMP5]], align 8, !dbg [[DBG138]]
// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG138]]
// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG138]]
// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4, !dbg [[DBG138]]
// CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB24:[0-9]+]], i32 [[TMP4]]), !dbg [[DBG138]]
// CHECK5-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0, !dbg [[DBG138]]
// CHECK5-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG138]]
// CHECK5: omp_if.then:
// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0, !dbg [[DBG140:![0-9]+]]
// CHECK5-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP7]], align 8, !dbg [[DBG140]]
// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1, !dbg [[DBG140]]
// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[_TMP4]], align 8, !dbg [[DBG141:![0-9]+]]
// CHECK5-NEXT: store i32* [[TMP9]], i32** [[TMP8]], align 8, !dbg [[DBG140]]
// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2, !dbg [[DBG140]]
// CHECK5-NEXT: store i32* [[CONV1]], i32** [[TMP10]], align 8, !dbg [[DBG140]]
// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3, !dbg [[DBG140]]
// CHECK5-NEXT: [[TMP12:%.*]] = load i32*, i32** [[_TMP5]], align 8, !dbg [[DBG141]]
// CHECK5-NEXT: store i32* [[TMP12]], i32** [[TMP11]], align 8, !dbg [[DBG140]]
// CHECK5-NEXT: invoke void @_ZZN2SSC1ERiENKUlvE_clEv(%class.anon.1* nonnull align 8 dereferenceable(32) [[REF_TMP]])
// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG140]]
// CHECK5: invoke.cont:
// CHECK5-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB24]], i32 [[TMP4]]), !dbg [[DBG140]]
// CHECK5-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG140]]
// CHECK5-NEXT: br label [[OMP_IF_END]], !dbg [[DBG140]]
// CHECK5: lpad:
// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 }
// CHECK5-NEXT: catch i8* null, !dbg [[DBG142:![0-9]+]]
// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0, !dbg [[DBG142]]
// CHECK5-NEXT: store i8* [[TMP14]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG142]]
// CHECK5-NEXT: [[TMP15:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 1, !dbg [[DBG142]]
// CHECK5-NEXT: store i32 [[TMP15]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG142]]
// CHECK5-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB24]], i32 [[TMP4]]), !dbg [[DBG140]]
// CHECK5-NEXT: br label [[TERMINATE_HANDLER:%.*]], !dbg [[DBG140]]
// CHECK5: omp_if.end:
// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0, !dbg [[DBG140]]
// CHECK5-NEXT: [[TMP17:%.*]] = load i32*, i32** [[_TMP4]], align 8, !dbg [[DBG143:![0-9]+]]
// CHECK5-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP17]] to i8*, !dbg [[DBG140]]
// CHECK5-NEXT: store i8* [[TMP18]], i8** [[TMP16]], align 8, !dbg [[DBG140]]
// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1, !dbg [[DBG140]]
// CHECK5-NEXT: [[TMP20:%.*]] = bitcast i32* [[CONV1]] to i8*, !dbg [[DBG140]]
// CHECK5-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8, !dbg [[DBG140]]
// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2, !dbg [[DBG140]]
// CHECK5-NEXT: [[TMP22:%.*]] = load i32*, i32** [[_TMP5]], align 8, !dbg [[DBG144:![0-9]+]]
// CHECK5-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP22]] to i8*, !dbg [[DBG140]]
// CHECK5-NEXT: store i8* [[TMP23]], i8** [[TMP21]], align 8, !dbg [[DBG140]]
// CHECK5-NEXT: [[TMP24:%.*]] = bitcast [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*, !dbg [[DBG140]]
// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG140]]
// CHECK5-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB24]], i32 [[TMP4]], i64 24, i8* [[TMP24]], void (i8*, i8*)* @.omp.copyprivate.copy_func.9, i32 [[TMP25]]), !dbg [[DBG140]]
// CHECK5-NEXT: ret void, !dbg [[DBG145:![0-9]+]]
// CHECK5: terminate.handler:
// CHECK5-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG140]]
// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[EXN]]) #[[ATTR13]], !dbg [[DBG140]]
// CHECK5-NEXT: unreachable, !dbg [[DBG140]]
//
//
// CHECK5-LABEL: define {{[^@]+}}@_ZZN2SSC1ERiENKUlvE_clEv
// CHECK5-SAME: (%class.anon.1* nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR10]] align 2 !dbg [[DBG146:![0-9]+]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon.1*, align 8
// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK5-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
// CHECK5-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8
// CHECK5-NEXT: store %class.anon.1* [[THIS]], %class.anon.1** [[THIS_ADDR]], align 8
// CHECK5-NEXT: [[THIS1:%.*]] = load %class.anon.1*, %class.anon.1** [[THIS_ADDR]], align 8
// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1:%.*]], %class.anon.1* [[THIS1]], i32 0, i32 0
// CHECK5-NEXT: [[TMP1:%.*]] = load %struct.SS*, %struct.SS** [[TMP0]], align 8
// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 1, !dbg [[DBG147:![0-9]+]]
// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8, !dbg [[DBG147]]
// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4, !dbg [[DBG148:![0-9]+]]
// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1, !dbg [[DBG148]]
// CHECK5-NEXT: store i32 [[INC]], i32* [[TMP3]], align 4, !dbg [[DBG148]]
// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 2, !dbg [[DBG149:![0-9]+]]
// CHECK5-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8, !dbg [[DBG149]]
// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, !dbg [[DBG150:![0-9]+]]
// CHECK5-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP7]], -1, !dbg [[DBG150]]
// CHECK5-NEXT: store i32 [[DEC]], i32* [[TMP6]], align 4, !dbg [[DBG150]]
// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 3, !dbg [[DBG151:![0-9]+]]
// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[TMP8]], align 8, !dbg [[DBG151]]
// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4, !dbg [[DBG152:![0-9]+]]
// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1, !dbg [[DBG152]]
// CHECK5-NEXT: store i32 [[DIV]], i32* [[TMP9]], align 4, !dbg [[DBG152]]
// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 1, !dbg [[DBG153:![0-9]+]]
// CHECK5-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 8, !dbg [[DBG153]]
// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4, !dbg [[DBG154:![0-9]+]]
// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*, !dbg [[DBG154]]
// CHECK5-NEXT: store i32 [[TMP13]], i32* [[CONV]], align 4, !dbg [[DBG154]]
// CHECK5-NEXT: [[TMP14:%.*]] = load i64, i64* [[A_CASTED]], align 8, !dbg [[DBG154]]
// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 2, !dbg [[DBG155:![0-9]+]]
// CHECK5-NEXT: [[TMP16:%.*]] = load i32*, i32** [[TMP15]], align 8, !dbg [[DBG155]]
// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4, !dbg [[DBG154]]
// CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32*, !dbg [[DBG154]]
// CHECK5-NEXT: store i32 [[TMP17]], i32* [[CONV2]], align 4, !dbg [[DBG154]]
// CHECK5-NEXT: [[TMP18:%.*]] = load i64, i64* [[B_CASTED]], align 8, !dbg [[DBG154]]
// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 3, !dbg [[DBG156:![0-9]+]]
// CHECK5-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP19]], align 8, !dbg [[DBG156]]
// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4, !dbg [[DBG154]]
// CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[C_CASTED]] to i32*, !dbg [[DBG154]]
// CHECK5-NEXT: store i32 [[TMP21]], i32* [[CONV3]], align 4, !dbg [[DBG154]]
// CHECK5-NEXT: [[TMP22:%.*]] = load i64, i64* [[C_CASTED]], align 8, !dbg [[DBG154]]
// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i64, i64, i64)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.SS* [[TMP1]], i64 [[TMP14]], i64 [[TMP18]], i64 [[TMP22]]), !dbg [[DBG154]]
// CHECK5-NEXT: ret void, !dbg [[DBG157:![0-9]+]]
//
//
// CHECK5-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.9
// CHECK5-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR9]] !dbg [[DBG158:![0-9]+]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK5-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK5-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG159:![0-9]+]]
// CHECK5-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [3 x i8*]*, !dbg [[DBG159]]
// CHECK5-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8, !dbg [[DBG159]]
// CHECK5-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [3 x i8*]*, !dbg [[DBG159]]
// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 0, !dbg [[DBG159]]
// CHECK5-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8, !dbg [[DBG159]]
// CHECK5-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*, !dbg [[DBG159]]
// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 0, !dbg [[DBG159]]
// CHECK5-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8, !dbg [[DBG159]]
// CHECK5-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*, !dbg [[DBG159]]
// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4, !dbg [[DBG160:![0-9]+]]
// CHECK5-NEXT: store i32 [[TMP12]], i32* [[TMP8]], align 4, !dbg [[DBG160]]
// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 1, !dbg [[DBG159]]
// CHECK5-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 8, !dbg [[DBG159]]
// CHECK5-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i32*, !dbg [[DBG159]]
// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 1, !dbg [[DBG159]]
// CHECK5-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8, !dbg [[DBG159]]
// CHECK5-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i32*, !dbg [[DBG159]]
// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4, !dbg [[DBG161:![0-9]+]]
// CHECK5-NEXT: store i32 [[TMP19]], i32* [[TMP15]], align 4, !dbg [[DBG161]]
// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 2, !dbg [[DBG159]]
// CHECK5-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8, !dbg [[DBG159]]
// CHECK5-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP21]] to i32*, !dbg [[DBG159]]
// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 2, !dbg [[DBG159]]
// CHECK5-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP23]], align 8, !dbg [[DBG159]]
// CHECK5-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to i32*, !dbg [[DBG159]]
// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4, !dbg [[DBG162:![0-9]+]]
// CHECK5-NEXT: store i32 [[TMP26]], i32* [[TMP22]], align 4, !dbg [[DBG162]]
// CHECK5-NEXT: ret void, !dbg [[DBG162]]
//
//
// CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10
// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SS* [[THIS:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]]) #[[ATTR12]] !dbg [[DBG163:![0-9]+]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8
// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8
// CHECK5-NEXT: [[TMP:%.*]] = alloca i32*, align 8
// CHECK5-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8
// CHECK5-NEXT: [[_TMP4:%.*]] = alloca i32*, align 8
// CHECK5-NEXT: [[_TMP5:%.*]] = alloca i32*, align 8
// CHECK5-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x i8*], align 8
// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8
// CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK5-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK5-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8
// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8, !dbg [[DBG164:![0-9]+]]
// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*, !dbg [[DBG164]]
// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32*, !dbg [[DBG164]]
// CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[C_ADDR]] to i32*, !dbg [[DBG164]]
// CHECK5-NEXT: store i32* [[CONV]], i32** [[TMP]], align 8, !dbg [[DBG164]]
// CHECK5-NEXT: store i32* [[CONV2]], i32** [[_TMP3]], align 8, !dbg [[DBG164]]
// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP]], align 8, !dbg [[DBG165:![0-9]+]]
// CHECK5-NEXT: store i32* [[TMP1]], i32** [[_TMP4]], align 8, !dbg [[DBG166:![0-9]+]]
// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[_TMP3]], align 8, !dbg [[DBG167:![0-9]+]]
// CHECK5-NEXT: store i32* [[TMP2]], i32** [[_TMP5]], align 8, !dbg [[DBG166]]
// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG166]]
// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG166]]
// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4, !dbg [[DBG166]]
// CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB28:[0-9]+]], i32 [[TMP4]]), !dbg [[DBG166]]
// CHECK5-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0, !dbg [[DBG166]]
// CHECK5-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG166]]
// CHECK5: omp_if.then:
// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[_TMP4]], align 8, !dbg [[DBG165]]
// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4, !dbg [[DBG168:![0-9]+]]
// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1, !dbg [[DBG168]]
// CHECK5-NEXT: store i32 [[INC]], i32* [[TMP7]], align 4, !dbg [[DBG168]]
// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 8, !dbg [[DBG169:![0-9]+]]
// CHECK5-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP9]], -1, !dbg [[DBG169]]
// CHECK5-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 8, !dbg [[DBG169]]
// CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP5]], align 8, !dbg [[DBG167]]
// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4, !dbg [[DBG170:![0-9]+]]
// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 1, !dbg [[DBG170]]
// CHECK5-NEXT: store i32 [[DIV]], i32* [[TMP10]], align 4, !dbg [[DBG170]]
// CHECK5-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB28]], i32 [[TMP4]]), !dbg [[DBG168]]
// CHECK5-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG168]]
// CHECK5-NEXT: br label [[OMP_IF_END]], !dbg [[DBG168]]
// CHECK5: omp_if.end:
// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0, !dbg [[DBG168]]
// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP4]], align 8, !dbg [[DBG171:![0-9]+]]
// CHECK5-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8*, !dbg [[DBG168]]
// CHECK5-NEXT: store i8* [[TMP14]], i8** [[TMP12]], align 8, !dbg [[DBG168]]
// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1, !dbg [[DBG168]]
// CHECK5-NEXT: [[TMP16:%.*]] = bitcast i32* [[CONV1]] to i8*, !dbg [[DBG168]]
// CHECK5-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8, !dbg [[DBG168]]
// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2, !dbg [[DBG168]]
// CHECK5-NEXT: [[TMP18:%.*]] = load i32*, i32** [[_TMP5]], align 8, !dbg [[DBG172:![0-9]+]]
// CHECK5-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to i8*, !dbg [[DBG168]]
// CHECK5-NEXT: store i8* [[TMP19]], i8** [[TMP17]], align 8, !dbg [[DBG168]]
// CHECK5-NEXT: [[TMP20:%.*]] = bitcast [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*, !dbg [[DBG168]]
// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG168]]
// CHECK5-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB28]], i32 [[TMP4]], i64 24, i8* [[TMP20]], void (i8*, i8*)* @.omp.copyprivate.copy_func.11, i32 [[TMP21]]), !dbg [[DBG168]]
// CHECK5-NEXT: ret void, !dbg [[DBG173:![0-9]+]]
//
//
// CHECK5-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.11
// CHECK5-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR9]] !dbg [[DBG174:![0-9]+]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK5-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK5-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG175:![0-9]+]]
// CHECK5-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [3 x i8*]*, !dbg [[DBG175]]
// CHECK5-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8, !dbg [[DBG175]]
// CHECK5-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [3 x i8*]*, !dbg [[DBG175]]
// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 0, !dbg [[DBG175]]
// CHECK5-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8, !dbg [[DBG175]]
// CHECK5-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*, !dbg [[DBG175]]
// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 0, !dbg [[DBG175]]
// CHECK5-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8, !dbg [[DBG175]]
// CHECK5-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*, !dbg [[DBG175]]
// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4, !dbg [[DBG176:![0-9]+]]
// CHECK5-NEXT: store i32 [[TMP12]], i32* [[TMP8]], align 4, !dbg [[DBG176]]
// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 1, !dbg [[DBG175]]
// CHECK5-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 8, !dbg [[DBG175]]
// CHECK5-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i32*, !dbg [[DBG175]]
// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 1, !dbg [[DBG175]]
// CHECK5-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8, !dbg [[DBG175]]
// CHECK5-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i32*, !dbg [[DBG175]]
// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4, !dbg [[DBG177:![0-9]+]]
// CHECK5-NEXT: store i32 [[TMP19]], i32* [[TMP15]], align 4, !dbg [[DBG177]]
// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 2, !dbg [[DBG175]]
// CHECK5-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8, !dbg [[DBG175]]
// CHECK5-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP21]] to i32*, !dbg [[DBG175]]
// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 2, !dbg [[DBG175]]
// CHECK5-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP23]], align 8, !dbg [[DBG175]]
// CHECK5-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to i32*, !dbg [[DBG175]]
// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4, !dbg [[DBG178:![0-9]+]]
// CHECK5-NEXT: store i32 [[TMP26]], i32* [[TMP22]], align 4, !dbg [[DBG178]]
// CHECK5-NEXT: ret void, !dbg [[DBG178]]
//
//
// CHECK5-LABEL: define {{[^@]+}}@_Z15parallel_singlev
// CHECK5-SAME: () #[[ATTR10]] !dbg [[DBG179:![0-9]+]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB35:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..12 to void (i32*, i32*, ...)*)), !dbg [[DBG180:![0-9]+]]
// CHECK5-NEXT: ret void, !dbg [[DBG181:![0-9]+]]
//
//
// CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..12
// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR12]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG182:![0-9]+]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK5-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8
// CHECK5-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG183:![0-9]+]]
// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4, !dbg [[DBG183]]
// CHECK5-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB32:[0-9]+]], i32 [[TMP1]]), !dbg [[DBG183]]
// CHECK5-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0, !dbg [[DBG183]]
// CHECK5-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG183]]
// CHECK5: omp_if.then:
// CHECK5-NEXT: invoke void @_Z3foov()
// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG184:![0-9]+]]
// CHECK5: invoke.cont:
// CHECK5-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB32]], i32 [[TMP1]]), !dbg [[DBG184]]
// CHECK5-NEXT: br label [[OMP_IF_END]], !dbg [[DBG184]]
// CHECK5: lpad:
// CHECK5-NEXT: [[TMP4:%.*]] = landingpad { i8*, i32 }
// CHECK5-NEXT: catch i8* null, !dbg [[DBG185:![0-9]+]]
// CHECK5-NEXT: [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0, !dbg [[DBG185]]
// CHECK5-NEXT: store i8* [[TMP5]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG185]]
// CHECK5-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 1, !dbg [[DBG185]]
// CHECK5-NEXT: store i32 [[TMP6]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG185]]
// CHECK5-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB32]], i32 [[TMP1]]), !dbg [[DBG184]]
// CHECK5-NEXT: br label [[TERMINATE_HANDLER:%.*]], !dbg [[DBG184]]
// CHECK5: omp_if.end:
// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB33:[0-9]+]], i32 [[TMP1]]), !dbg [[DBG186:![0-9]+]]
// CHECK5-NEXT: ret void, !dbg [[DBG186]]
// CHECK5: terminate.handler:
// CHECK5-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG184]]
// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[EXN]]) #[[ATTR13]], !dbg [[DBG184]]
// CHECK5-NEXT: unreachable, !dbg [[DBG184]]
//
//
// CHECK5-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_single_codegen.cpp
// CHECK5-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG187:![0-9]+]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: call void @__cxx_global_var_init(), !dbg [[DBG188:![0-9]+]]
// CHECK5-NEXT: call void @__cxx_global_var_init.4(), !dbg [[DBG188]]
// CHECK5-NEXT: call void @.__omp_threadprivate_init_.(), !dbg [[DBG188]]
// CHECK5-NEXT: call void @.__omp_threadprivate_init_..3(), !dbg [[DBG188]]
// CHECK5-NEXT: ret void
//
//
// CHECK6-LABEL: define {{[^@]+}}@_Z10array_funciPiP2St
// CHECK6-SAME: (i32 [[N:%.*]], i32* [[A:%.*]], %struct.St* [[S:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK6-NEXT: entry:
// CHECK6-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8
// CHECK6-NEXT: [[S_ADDR:%.*]] = alloca %struct.St*, align 8
// CHECK6-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [2 x i8*], align 8
// CHECK6-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK6-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK6-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8
// CHECK6-NEXT: store %struct.St* [[S]], %struct.St** [[S_ADDR]], align 8
// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK6-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
// CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK6-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK6-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
// CHECK6-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK6: omp_if.then:
// CHECK6-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK6-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK6-NEXT: br label [[OMP_IF_END]]
// CHECK6: omp_if.end:
// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0
// CHECK6-NEXT: [[TMP6:%.*]] = bitcast i32** [[A_ADDR]] to i8*
// CHECK6-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 8
// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1
// CHECK6-NEXT: [[TMP8:%.*]] = bitcast %struct.St** [[S_ADDR]] to i8*
// CHECK6-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 8
// CHECK6-NEXT: [[TMP9:%.*]] = bitcast [2 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*
// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
// CHECK6-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i64 16, i8* [[TMP9]], void (i8*, i8*)* @.omp.copyprivate.copy_func, i32 [[TMP10]])
// CHECK6-NEXT: ret void
//
//
// CHECK6-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func
// CHECK6-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
// CHECK6-NEXT: entry:
// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK6-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK6-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK6-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [2 x i8*]*
// CHECK6-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
// CHECK6-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]*
// CHECK6-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP3]], i64 0, i64 0
// CHECK6-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK6-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32**
// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0
// CHECK6-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
// CHECK6-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32**
// CHECK6-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 8
// CHECK6-NEXT: store i32* [[TMP12]], i32** [[TMP8]], align 8
// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP3]], i64 0, i64 1
// CHECK6-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 8
// CHECK6-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to %struct.St**
// CHECK6-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1
// CHECK6-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8
// CHECK6-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to %struct.St**
// CHECK6-NEXT: [[TMP19:%.*]] = load %struct.St*, %struct.St** [[TMP18]], align 8
// CHECK6-NEXT: store %struct.St* [[TMP19]], %struct.St** [[TMP15]], align 8
// CHECK6-NEXT: ret void
//
//