From b6965f7246bba1b399755f56d8ae34893e815198 Mon Sep 17 00:00:00 2001 From: Alexander Shaposhnikov Date: Tue, 13 Sep 2022 21:47:13 +0000 Subject: [PATCH] [IR] Add alignment for llvm.threadlocal.address This diff sets the alignment attribute for the return value and the argument of llvm.threadlocal.address. (https://github.com/llvm/llvm-project/issues/57438) Test plan: ninja check-all Differential revision: https://reviews.llvm.org/D133741 --- .../cxx11-thread-local-instantiated.cpp | 2 +- .../cxx11-thread-local-reference.cpp | 4 +- clang/test/CodeGenCXX/cxx11-thread-local.cpp | 34 +++---- .../CodeGenCXX/cxx1y-variable-template.cpp | 2 +- .../cxx2a-thread-local-constinit.cpp | 4 +- .../microsoft-abi-thread-safe-statics.cpp | 2 +- clang/test/CodeGenCXX/pr18635.cpp | 2 +- clang/test/CodeGenCXX/threadlocal_address.cpp | 20 ++-- clang/test/Modules/initializers.cpp | 16 ++-- clang/test/OpenMP/parallel_copyin_codegen.cpp | 96 +++++++++---------- .../OpenMP/parallel_copyin_combined_codegen.c | 42 ++++---- clang/test/OpenMP/parallel_master_codegen.cpp | 8 +- ...distribute_parallel_for_copyin_codegen.cpp | 14 +-- clang/test/OpenMP/threadprivate_codegen.cpp | 72 +++++++------- llvm/lib/IR/IRBuilder.cpp | 17 +++- 15 files changed, 174 insertions(+), 161 deletions(-) diff --git a/clang/test/CodeGenCXX/cxx11-thread-local-instantiated.cpp b/clang/test/CodeGenCXX/cxx11-thread-local-instantiated.cpp index 46c16bdd94e3..fc3514a8b17d 100644 --- a/clang/test/CodeGenCXX/cxx11-thread-local-instantiated.cpp +++ b/clang/test/CodeGenCXX/cxx11-thread-local-instantiated.cpp @@ -17,7 +17,7 @@ S *current() { return TLS::mData; }; // CHECK-LABEL: define weak_odr hidden {{.*}} @_ZTWN3TLSI1SE5mDataE() {{.*}} comdat { // CHECK: call void @_ZTHN3TLSI1SE5mDataE() -// CHECK: [[TLSmData_ADDR:%[^ ]+]] = call ptr @llvm.threadlocal.address.p0(ptr @_ZN3TLSI1SE5mDataE) +// CHECK: [[TLSmData_ADDR:%[^ ]+]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @_ZN3TLSI1SE5mDataE) // CHECK: ret {{.*}} [[TLSmData_ADDR]] // Unlike for a global, the global initialization function must not be in a diff --git a/clang/test/CodeGenCXX/cxx11-thread-local-reference.cpp b/clang/test/CodeGenCXX/cxx11-thread-local-reference.cpp index 3fc960f1eefc..1d5143f1d351 100644 --- a/clang/test/CodeGenCXX/cxx11-thread-local-reference.cpp +++ b/clang/test/CodeGenCXX/cxx11-thread-local-reference.cpp @@ -15,7 +15,7 @@ int &g() { return r; } // CHECK: define {{.*}} @[[R_INIT:.*]]() // CHECK: call noundef nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) i32* @_Z1fv() -// CHECK: %[[R_ADDR:.+]] = call i32** @llvm.threadlocal.address.p0p0i32(i32** @r) +// CHECK: %[[R_ADDR:.+]] = call align 8 i32** @llvm.threadlocal.address.p0p0i32(i32** align 8 @r) // CHECK: store i32* %{{.*}}, i32** %[[R_ADDR]], align 8 // CHECK-LABEL: define{{.*}} nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) i32* @_Z1gv() @@ -27,7 +27,7 @@ int &g() { return r; } // DARWIN: define cxx_fast_tlscc noundef i32* @_ZTW1r() [[ATTR1:#[0-9]+]] { // LINUX_AIX: call void @_ZTH1r() // DARWIN: call cxx_fast_tlscc void @_ZTH1r() -// CHECK: %[[R_ADDR2:.+]] = call i32** @llvm.threadlocal.address.p0p0i32(i32** @r) +// CHECK: %[[R_ADDR2:.+]] = call align 8 i32** @llvm.threadlocal.address.p0p0i32(i32** align 8 @r) // CHECK: load i32*, i32** %[[R_ADDR2]], align 8 // CHECK: ret i32* %{{.*}} diff --git a/clang/test/CodeGenCXX/cxx11-thread-local.cpp b/clang/test/CodeGenCXX/cxx11-thread-local.cpp index 4023daa8192a..7b53211e9ceb 100644 --- a/clang/test/CodeGenCXX/cxx11-thread-local.cpp +++ b/clang/test/CodeGenCXX/cxx11-thread-local.cpp @@ -108,7 +108,7 @@ void *e2 = V::m + W::m + &X::m; // CHECK: define {{.*}} @[[A_INIT:.*]]() // CHECK: call{{.*}} i32 @_Z1fv() -// CHECK: [[A_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @a) +// CHECK: [[A_ADDR:%.+]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @a) // CHECK-NEXT: store i32 {{.*}}, i32* [[A_ADDR]], align 4 // CHECK-LABEL: define{{.*}} i32 @_Z1fv() @@ -118,13 +118,13 @@ int f() { // CHECK: br i1 %[[NEED_INIT]]{{.*}} // CHECK: %[[CALL:.*]] = call{{.*}} i32 @_Z1gv() - // CHECK: [[N_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZZ1fvE1n) + // CHECK: [[N_ADDR:%.+]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @_ZZ1fvE1n) // CHECK: store i32 %[[CALL]], i32* [[N_ADDR]], align 4 // CHECK: store i8 1, i8* @_ZGVZ1fvE1n // CHECK: br label static thread_local int n = g(); - - // CHECK: [[N_ADDR2:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZZ1fvE1n) + + // CHECK: [[N_ADDR2:%.+]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @_ZZ1fvE1n) // CHECK: load i32, i32* [[N_ADDR2]], align 4 return n; } @@ -143,19 +143,19 @@ int f() { // LINUX: br label // AIX-NOT: br label // finally: -// LINUX_AIX: [[B_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @b) +// LINUX_AIX: [[B_ADDR:%.+]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @b) // LINUX_AIX: ret i32* [[B_ADDR]] // DARWIN-LABEL: declare cxx_fast_tlscc noundef i32* @_ZTW1b() // There is no definition of the thread wrapper on Darwin for external TLV. // CHECK: define {{.*}} @[[D_INIT:.*]]() // CHECK: call{{.*}} i32 @_Z1gv() -// CHECK-NEXT: [[D_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZL1d) +// CHECK-NEXT: [[D_ADDR:%.+]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @_ZL1d) // CHECK-NEXT: store i32 %{{.*}}, i32* [[D_ADDR]], align 4 // CHECK: define {{.*}} @[[U_M_INIT:.*]]() // CHECK: call{{.*}} i32 @_Z1fv() -// CHECK-NEXT: [[UM_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZN1U1mE) +// CHECK-NEXT: [[UM_ADDR:%.+]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @_ZN1U1mE) // CHECK-NEXT: store i32 %{{.*}}, i32* [[UM_ADDR]], align 4 // CHECK: define {{.*}} @[[E_INIT:.*]]() @@ -170,20 +170,20 @@ int f() { // DARWIN-LABEL: define weak_odr hidden cxx_fast_tlscc noundef i32* @_ZTWN1VIiE1mE() // LINUX_AIX: call void @_ZTHN1VIiE1mE() // DARWIN: call cxx_fast_tlscc void @_ZTHN1VIiE1mE() -// CHECK: [[VM_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZN1VIiE1mE) +// CHECK: [[VM_ADDR:%.+]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @_ZN1VIiE1mE) // CHECK: ret i32* [[VM_ADDR]] // LINUX_AIX-LABEL: define weak_odr hidden noundef i32* @_ZTWN1WIiE1mE() // DARWIN-LABEL: define weak_odr hidden cxx_fast_tlscc noundef i32* @_ZTWN1WIiE1mE() // CHECK-NOT: call -// CHECK: [[WM_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZN1WIiE1mE) +// CHECK: [[WM_ADDR:%.+]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @_ZN1WIiE1mE) // CHECK: ret i32* [[WM_ADDR]] // LINUX_AIX-LABEL: define weak_odr hidden {{.*}}* @_ZTWN1XIiE1mE() // DARWIN-LABEL: define weak_odr hidden cxx_fast_tlscc {{.*}}* @_ZTWN1XIiE1mE() // LINUX_AIX: call void @_ZTHN1XIiE1mE() // DARWIN: call cxx_fast_tlscc void @_ZTHN1XIiE1mE() -// CHECK: [[XM_ADDR:%.+]] = call %struct.Dtor* @llvm.threadlocal.address.p0s_struct.Dtors(%struct.Dtor* @_ZN1XIiE1mE) +// CHECK: [[XM_ADDR:%.+]] = call align 1 %struct.Dtor* @llvm.threadlocal.address.p0s_struct.Dtors(%struct.Dtor* align 1 @_ZN1XIiE1mE) // CHECK: ret {{.*}}* [[XM_ADDR]] // LINUX_AIX: define internal void @[[VF_M_INIT]]() @@ -194,7 +194,7 @@ int f() { // CHECK: br i1 %[[VF_M_INITIALIZED]], // need init: // CHECK: call{{.*}} i32 @_Z1gv() -// CHECK: [[VFM_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZN1VIfE1mE) +// CHECK: [[VFM_ADDR:%.+]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @_ZN1VIfE1mE) // CHECK: store i32 %{{.*}}, i32* [[VFM_ADDR]], align 4 // CHECK: store i8 1, i8* bitcast (i64* @_ZGVN1VIfE1mE to i8*) // CHECK: br label @@ -222,7 +222,7 @@ int f() { // LINUX: br i1 icmp ne (void ()* @_ZTHN1VIcE1mE, // AIX-NOT: br i1 icmp ne (void ()* @_ZTHN1VIcE1mE // LINUX_AIX: call void @_ZTHN1VIcE1mE() -// LINUX_AIX: [[VEM_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZN1VIcE1mE) +// LINUX_AIX: [[VEM_ADDR:%.+]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @_ZN1VIcE1mE) // LINUX_AIX: ret i32* [[VEM_ADDR]] // DARWIN: declare cxx_fast_tlscc noundef i32* @_ZTWN1WIcE1mE() @@ -230,7 +230,7 @@ int f() { // LINUX: br i1 icmp ne (void ()* @_ZTHN1WIcE1mE, // AIX-NOT: br i1 icmp ne (void ()* @_ZTHN1WIcE1mE, // LINUX_AIX: call void @_ZTHN1WIcE1mE() -// LINUX_AIX: [[WEM_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZN1WIcE1mE) +// LINUX_AIX: [[WEM_ADDR:%.+]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @_ZN1WIcE1mE) // LINUX_AIX: ret i32* [[WEM_ADDR]] // DARWIN: declare cxx_fast_tlscc {{.*}}* @_ZTWN1XIcE1mE() @@ -238,7 +238,7 @@ int f() { // LINUX: br i1 icmp ne (void ()* @_ZTHN1XIcE1mE, // AIX-NOT: br i1 icmp ne (void ()* @_ZTHN1XIcE1mE, // LINUX_AIX: call void @_ZTHN1XIcE1mE() -// LINUX_AIX: [[XEM_ADDR:%.+]] = call %struct.Dtor* @llvm.threadlocal.address.p0s_struct.Dtors(%struct.Dtor* @_ZN1XIcE1mE) +// LINUX_AIX: [[XEM_ADDR:%.+]] = call align 1 %struct.Dtor* @llvm.threadlocal.address.p0s_struct.Dtors(%struct.Dtor* align 1 @_ZN1XIcE1mE) // LINUX_AIX: ret {{.*}}* [[XEM_ADDR]] struct S { S(); ~S(); }; @@ -330,7 +330,7 @@ void set_anon_i() { // CHECK: br i1 %[[V_M_INITIALIZED]], // need init: // CHECK: call{{.*}} i32 @_Z1gv() -// CHECK: [[VEM_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZN1VIiE1mE) +// CHECK: [[VEM_ADDR:%.+]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @_ZN1VIiE1mE) // CHECK: store i32 %{{.*}}, i32* [[VEM_ADDR]], align 4 // CHECK: store i8 1, i8* bitcast (i64* @_ZGVN1VIiE1mE to i8*) // CHECK: br label @@ -373,7 +373,7 @@ void set_anon_i() { // DARWIN: define cxx_fast_tlscc noundef i32* @_ZTW1a() // LINUX_AIX: call void @_ZTH1a() // DARWIN: call cxx_fast_tlscc void @_ZTH1a() -// CHECK: [[A_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @a) +// CHECK: [[A_ADDR:%.+]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @a) // CHECK: ret i32* [[A_ADDR]] // CHECK: } @@ -387,7 +387,7 @@ void set_anon_i() { // DARWIN-LABEL: define cxx_fast_tlscc noundef i32* @_ZTWN1U1mE() // LINUX_AIX: call void @_ZTHN1U1mE() // DARWIN: call cxx_fast_tlscc void @_ZTHN1U1mE() -// CHECK: [[UM_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZN1U1mE) +// CHECK: [[UM_ADDR:%.+]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @_ZN1U1mE) // CHECK: ret i32* [[UM_ADDR]] // LINUX_AIX: declare extern_weak void @_ZTH1b() [[ATTR:#[0-9]+]] diff --git a/clang/test/CodeGenCXX/cxx1y-variable-template.cpp b/clang/test/CodeGenCXX/cxx1y-variable-template.cpp index d67d12f6c7bc..23f475ed44e6 100644 --- a/clang/test/CodeGenCXX/cxx1y-variable-template.cpp +++ b/clang/test/CodeGenCXX/cxx1y-variable-template.cpp @@ -41,7 +41,7 @@ namespace PR42111 { // CHECK: icmp eq i8 {{.*}}, 0 // CHECK: br i1 // CHECK: call noundef i32 @_ZN7PR421111fEv( - // CHECK: [[N_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZN7PR4211112_GLOBAL__N_11nILi0EEE) + // CHECK: [[N_ADDR:%.+]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @_ZN7PR4211112_GLOBAL__N_11nILi0EEE) // CHECK: store i32 {{.*}}, i32* [[N_ADDR]] // CHECK: store i8 1, i8* @_ZGVN7PR4211112_GLOBAL__N_11nILi0EEE int g() { return n<> + n<>; } diff --git a/clang/test/CodeGenCXX/cxx2a-thread-local-constinit.cpp b/clang/test/CodeGenCXX/cxx2a-thread-local-constinit.cpp index 6b511db5b98e..18e55435486f 100644 --- a/clang/test/CodeGenCXX/cxx2a-thread-local-constinit.cpp +++ b/clang/test/CodeGenCXX/cxx2a-thread-local-constinit.cpp @@ -31,7 +31,7 @@ int get_a() { return a; } // CHECK-LABEL: define{{.*}} i32 @_Z5get_bv() // CHECK-NOT: call -// CHECK: [[B_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @b) +// CHECK: [[B_ADDR:%.+]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @b) // CHECK: load i32, i32* [[B_ADDR]] // CHECK-NOT: call // CHECK: } @@ -53,7 +53,7 @@ int get_c() { return c; } // LINUX-LABEL: define weak_odr {{.*}} @_ZTW1c() // CHECK-NOT: br i1 // CHECK-NOT: call -// CHECK: [[C_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @c) +// CHECK: [[C_ADDR:%.+]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @c) // CHECK: ret i32* [[C_ADDR]] // CHECK: } diff --git a/clang/test/CodeGenCXX/microsoft-abi-thread-safe-statics.cpp b/clang/test/CodeGenCXX/microsoft-abi-thread-safe-statics.cpp index a571936c1a76..de2b05b05f43 100644 --- a/clang/test/CodeGenCXX/microsoft-abi-thread-safe-statics.cpp +++ b/clang/test/CodeGenCXX/microsoft-abi-thread-safe-statics.cpp @@ -38,7 +38,7 @@ extern inline S &f() { // CHECK-NEXT: br label %[[init_end:.*]] // CHECK: [[init_end]]: -// CHECK: [[S_ADDR:%.+]] = call %struct.S* @llvm.threadlocal.address.p0s_struct.Ss(%struct.S* @"?s@?1??f@@YAAAUS@@XZ@4U2@A") +// CHECK: [[S_ADDR:%.+]] = call align 1 %struct.S* @llvm.threadlocal.address.p0s_struct.Ss(%struct.S* align 1 @"?s@?1??f@@YAAAUS@@XZ@4U2@A") // CHECK-NEXT: ret %struct.S* [[S_ADDR]] // CHECK: [[lpad:.*]]: diff --git a/clang/test/CodeGenCXX/pr18635.cpp b/clang/test/CodeGenCXX/pr18635.cpp index 3cc2ff6bce90..c3e9e556155a 100644 --- a/clang/test/CodeGenCXX/pr18635.cpp +++ b/clang/test/CodeGenCXX/pr18635.cpp @@ -5,7 +5,7 @@ // returned somewhere in TLS wrapper: // CHECK: define {{.+}} ptr @_ZTW1x( -// CHECK: [[X_GLOBAL_ADDR:%[^ ]+]] = call ptr @llvm.threadlocal.address.p0(ptr [[X_GLOBAL]]) +// CHECK: [[X_GLOBAL_ADDR:%[^ ]+]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 [[X_GLOBAL]]) // CHECK: ret{{.*}}[[X_GLOBAL_ADDR]] template class unique_ptr { diff --git a/clang/test/CodeGenCXX/threadlocal_address.cpp b/clang/test/CodeGenCXX/threadlocal_address.cpp index 625db30dd0e7..f5af5c25facd 100644 --- a/clang/test/CodeGenCXX/threadlocal_address.cpp +++ b/clang/test/CodeGenCXX/threadlocal_address.cpp @@ -12,11 +12,11 @@ int g() { // // CHECK: @_Z1gv() // CHECK-NEXT: entry -// CHECK-NEXT: %[[IA:.+]] = call ptr @llvm.threadlocal.address.p0(ptr @i) +// CHECK-NEXT: %[[IA:.+]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @i) // CHECK-NEXT: %[[VA:.+]] = load i32, ptr %[[IA]] // CHECK-NEXT: %[[INC:.+]] = add nsw i32 %[[VA]], 1 // CHECK-NEXT: store i32 %[[INC]], ptr %[[IA]], align 4 -// CHECK-NEXT: %[[IA2:.+]] = call ptr @llvm.threadlocal.address.p0(ptr @i) +// CHECK-NEXT: %[[IA2:.+]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @i) // CHECK-NEXT: %[[RET:.+]] = load i32, ptr %[[IA2]], align 4 // CHECK-NEXT: ret i32 %[[RET]] // @@ -24,7 +24,7 @@ int g() { // // CHECK-O1-LABEL: @_Z1gv // CHECK-O1-NEXT: entry: -// CHECK-O1-NEXT: %[[I_ADDR:.+]] = {{.*}}call ptr @llvm.threadlocal.address.p0(ptr @i) +// CHECK-O1-NEXT: %[[I_ADDR:.+]] = {{.*}}call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @i) // CHECK-O1-NEXT: %[[VAL:.+]] = load i32, ptr %[[I_ADDR]] // CHECK-O1-NEXT: %[[INC:.+]] = add nsw i32 %[[VAL]], 1 // CHECK-O1-NEXT: store i32 %[[INC]], ptr %[[I_ADDR]] @@ -32,11 +32,11 @@ int g() { // // CHECK-NOOPAQUE-LABEL: @_Z1gv // CHECK-NOOPAQUE-NEXT: entry: -// CHECK-NOOPAQUE-NEXT: %[[I_ADDR:.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @i) +// CHECK-NOOPAQUE-NEXT: %[[I_ADDR:.+]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @i) // CHECK-NOOPAQUE-NEXT: %[[VAL:.+]] = load i32, i32* %[[I_ADDR]] // CHECK-NOOPAQUE-NEXT: %[[INC:.+]] = add nsw i32 %[[VAL]], 1 // CHECK-NOOPAQUE-NEXT: store i32 %[[INC]], i32* %[[I_ADDR]] -// CHECK-NOOPAQUE-NEXT: %[[IA2:.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @i) +// CHECK-NOOPAQUE-NEXT: %[[IA2:.+]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @i) // CHECK-NOOPAQUE-NEXT: %[[RET:.+]] = load i32, i32* %[[IA2]], align 4 // CHECK-NOOPAQUE-NEXT: ret i32 %[[RET]] int f() { @@ -46,17 +46,17 @@ int f() { } // CHECK: @_Z1fv() // CHECK-NEXT: entry -// CHECK-NEXT: %[[JA:.+]] = call ptr @llvm.threadlocal.address.p0(ptr @_ZZ1fvE1j) +// CHECK-NEXT: %[[JA:.+]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ1fvE1j) // CHECK-NEXT: %[[VA:.+]] = load i32, ptr %[[JA]] // CHECK-NEXT: %[[INC:.+]] = add nsw i32 %[[VA]], 1 // CHECK-NEXT: store i32 %[[INC]], ptr %[[JA]], align 4 -// CHECK-NEXT: %[[JA2:.+]] = call ptr @llvm.threadlocal.address.p0(ptr @_ZZ1fvE1j) +// CHECK-NEXT: %[[JA2:.+]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ1fvE1j) // CHECK-NEXT: %[[RET:.+]] = load i32, ptr %[[JA2]], align 4 // CHECK-NEXT: ret i32 %[[RET]] // // CHECK-O1-LABEL: @_Z1fv // CHECK-O1-NEXT: entry: -// CHECK-O1-NEXT: %[[J_ADDR:.+]] = {{.*}}call ptr @llvm.threadlocal.address.p0(ptr @_ZZ1fvE1j) +// CHECK-O1-NEXT: %[[J_ADDR:.+]] = {{.*}}call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ1fvE1j) // CHECK-O1-NEXT: %[[VAL:.+]] = load i32, ptr %[[J_ADDR]] // CHECK-O1-NEXT: %[[INC:.+]] = add nsw i32 %[[VAL]], 1 // CHECK-O1-NEXT: store i32 %[[INC]], ptr %[[J_ADDR]] @@ -64,11 +64,11 @@ int f() { // // CHECK-NOOPAQUE: @_Z1fv() // CHECK-NOOPAQUE-NEXT: entry -// CHECK-NOOPAQUE-NEXT: %[[JA:.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZZ1fvE1j) +// CHECK-NOOPAQUE-NEXT: %[[JA:.+]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @_ZZ1fvE1j) // CHECK-NOOPAQUE-NEXT: %[[VA:.+]] = load i32, i32* %[[JA]] // CHECK-NOOPAQUE-NEXT: %[[INC:.+]] = add nsw i32 %[[VA]], 1 // CHECK-NOOPAQUE-NEXT: store i32 %[[INC]], i32* %[[JA]], align 4 -// CHECK-NOOPAQUE-NEXT: %[[JA2:.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZZ1fvE1j) +// CHECK-NOOPAQUE-NEXT: %[[JA2:.+]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @_ZZ1fvE1j) // CHECK-NOOPAQUE-NEXT: %[[RET:.+]] = load i32, i32* %[[JA2]], align 4 // CHECK-NOOPAQUE-NEXT: ret i32 %[[RET]] // diff --git a/clang/test/Modules/initializers.cpp b/clang/test/Modules/initializers.cpp index 06d710813618..435d48d12dfa 100644 --- a/clang/test/Modules/initializers.cpp +++ b/clang/test/Modules/initializers.cpp @@ -174,12 +174,12 @@ inline void use(bool b, ...) { // CHECK: define {{.*}} @[[G_INIT:__cxx_global.*]]() // CHECK: load {{.*}} (i64* @_ZGV -// CHECK: [[G_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @[[G]]) +// CHECK: [[G_ADDR:%.+]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @[[G]]) // CHECK: store {{.*}}, i32* [[G_ADDR]] // CHECK: define {{.*}} @[[H_INIT:__cxx_global.*]]() // CHECK: load {{.*}} (i64* @_ZGV -// CHECK: [[H_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @[[H]]) +// CHECK: [[H_ADDR:%.+]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @[[H]]) // CHECK: store {{.*}}, i32* [[H_ADDR]], // FIXME: Should this use __cxa_guard_acquire? @@ -189,7 +189,7 @@ inline void use(bool b, ...) { // CHECK: define {{.*}} @[[XC_INIT:__cxx_global.*]]() // CHECK: load {{.*}} (i64* @_ZGV -// CHECK: [[XC_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @[[XC]]) +// CHECK: [[XC_ADDR:%.+]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @[[XC]]) // CHECK: store {{.*}}, i32* [[XC_ADDR]], // FIXME: Should this use __cxa_guard_acquire? @@ -199,12 +199,12 @@ inline void use(bool b, ...) { // CHECK: define {{.*}} @[[XG_INIT:__cxx_global.*]]() // CHECK: load {{.*}} (i64* @_ZGV -// CHECK: [[XG_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @[[XG]]) +// CHECK: [[XG_ADDR:%.+]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @[[XG]]) // CHECK: store {{.*}}, i32* [[XG_ADDR]], // CHECK: define {{.*}} @[[XH_INIT:__cxx_global.*]]() // CHECK: load {{.*}} (i64* @_ZGV -// CHECK: [[XH_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @[[XH]]) +// CHECK: [[XH_ADDR:%.+]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @[[XH]]) // CHECK: store {{.*}}, i32* [[XH_ADDR]], // FIXME: Should this use __cxa_guard_acquire? @@ -214,7 +214,7 @@ inline void use(bool b, ...) { // CHECK: define {{.*}} @[[XD_INIT:__cxx_global.*]]() // CHECK: load {{.*}} (i64* @_ZGV -// CHECK: [[XD_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @[[XD]]) +// CHECK: [[XD_ADDR:%.+]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @[[XD]]) // CHECK: store {{.*}}, i32* [[XD_ADDR]], // FIXME: Should this use __cxa_guard_acquire? @@ -232,12 +232,12 @@ inline void use(bool b, ...) { // CHECK-IMPORT: define {{.*}} @[[C_INIT:__cxx_global.*]]() // CHECK-IMPORT: call noundef i32 @_Z11non_trivialv( -// CHECK-IMPORT: [[C_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @[[C]]) +// CHECK-IMPORT: [[C_ADDR:%.+]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @[[C]]) // CHECK-IMPORT: store {{.*}}, i32* [[C_ADDR]], // CHECK-IMPORT: define {{.*}} @[[D_INIT:__cxx_global.*]]() // CHECK-IMPORT: load {{.*}} (i64* @_ZGV -// CHECK-IMPORT: [[D_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @[[D]]) +// CHECK-IMPORT: [[D_ADDR:%.+]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @[[D]]) // CHECK-IMPORT: store {{.*}}, i32* [[D_ADDR]], diff --git a/clang/test/OpenMP/parallel_copyin_codegen.cpp b/clang/test/OpenMP/parallel_copyin_codegen.cpp index 4981f1778bd4..b93c1a0e4d70 100644 --- a/clang/test/OpenMP/parallel_copyin_codegen.cpp +++ b/clang/test/OpenMP/parallel_copyin_codegen.cpp @@ -1106,12 +1106,12 @@ void foo() { // CHECK11-NEXT: store i8 1, i8* @_ZGVZ4mainE3var, align 1 // CHECK11-NEXT: br label [[INIT_END3]] // CHECK11: init.end3: -// CHECK11-NEXT: [[TMP4:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZZ4mainE5t_var) -// CHECK11-NEXT: [[TMP5:%.*]] = call [2 x i32]* @llvm.threadlocal.address.p0a2i32([2 x i32]* @_ZZ4mainE3vec) -// CHECK11-NEXT: [[TMP6:%.*]] = call [2 x %struct.S]* @llvm.threadlocal.address.p0a2s_struct.Ss([2 x %struct.S]* @_ZZ4mainE5s_arr) -// CHECK11-NEXT: [[TMP7:%.*]] = call %struct.S* @llvm.threadlocal.address.p0s_struct.Ss(%struct.S* @_ZZ4mainE3var) +// CHECK11-NEXT: [[TMP4:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @_ZZ4mainE5t_var) +// CHECK11-NEXT: [[TMP5:%.*]] = call align 4 [2 x i32]* @llvm.threadlocal.address.p0a2i32([2 x i32]* align 4 @_ZZ4mainE3vec) +// CHECK11-NEXT: [[TMP6:%.*]] = call align 4 [2 x %struct.S]* @llvm.threadlocal.address.p0a2s_struct.Ss([2 x %struct.S]* align 4 @_ZZ4mainE5s_arr) +// CHECK11-NEXT: [[TMP7:%.*]] = call align 4 %struct.S* @llvm.threadlocal.address.p0s_struct.Ss(%struct.S* align 4 @_ZZ4mainE3var) // CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S]*, %struct.S*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP4]], [2 x i32]* [[TMP5]], [2 x %struct.S]* [[TMP6]], %struct.S* [[TMP7]]) -// CHECK11-NEXT: [[TMP8:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZZ4mainE5t_var) +// CHECK11-NEXT: [[TMP8:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @_ZZ4mainE5t_var) // CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[TMP8]]) // CHECK11-NEXT: [[CALL4:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK11-NEXT: store i32 [[CALL4]], i32* [[RETVAL]], align 4 @@ -1199,7 +1199,7 @@ void foo() { // CHECK11-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 // CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZZ4mainE5t_var) +// CHECK11-NEXT: [[TMP4:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @_ZZ4mainE5t_var) // CHECK11-NEXT: [[TMP5:%.*]] = ptrtoint i32* [[TMP0]] to i64 // CHECK11-NEXT: [[TMP6:%.*]] = ptrtoint i32* [[TMP4]] to i64 // CHECK11-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP5]], [[TMP6]] @@ -1207,11 +1207,11 @@ void foo() { // CHECK11: copyin.not.master: // CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP0]], align 4 // CHECK11-NEXT: store i32 [[TMP8]], i32* [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = call [2 x i32]* @llvm.threadlocal.address.p0a2i32([2 x i32]* @_ZZ4mainE3vec) +// CHECK11-NEXT: [[TMP9:%.*]] = call align 4 [2 x i32]* @llvm.threadlocal.address.p0a2i32([2 x i32]* align 4 @_ZZ4mainE3vec) // CHECK11-NEXT: [[TMP10:%.*]] = bitcast [2 x i32]* [[TMP9]] to i8* // CHECK11-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* // CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP10]], i8* align 4 [[TMP11]], i64 8, i1 false) -// CHECK11-NEXT: [[TMP12:%.*]] = call [2 x %struct.S]* @llvm.threadlocal.address.p0a2s_struct.Ss([2 x %struct.S]* @_ZZ4mainE5s_arr) +// CHECK11-NEXT: [[TMP12:%.*]] = call align 4 [2 x %struct.S]* @llvm.threadlocal.address.p0a2s_struct.Ss([2 x %struct.S]* align 4 @_ZZ4mainE5s_arr) // CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP12]], i32 0, i32 0 // CHECK11-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP2]] to %struct.S* // CHECK11-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 2 @@ -1226,20 +1226,20 @@ void foo() { // CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] // CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] // CHECK11: omp.arraycpy.done1: -// CHECK11-NEXT: [[TMP15:%.*]] = call %struct.S* @llvm.threadlocal.address.p0s_struct.Ss(%struct.S* @_ZZ4mainE3var) +// CHECK11-NEXT: [[TMP15:%.*]] = call align 4 %struct.S* @llvm.threadlocal.address.p0s_struct.Ss(%struct.S* align 4 @_ZZ4mainE3var) // CHECK11-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP15]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP3]]) // CHECK11-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK11: copyin.not.master.end: // CHECK11-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 // CHECK11-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP17]]) -// CHECK11-NEXT: [[TMP18:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZZ4mainE5t_var) +// CHECK11-NEXT: [[TMP18:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @_ZZ4mainE5t_var) // CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = call [2 x i32]* @llvm.threadlocal.address.p0a2i32([2 x i32]* @_ZZ4mainE3vec) +// CHECK11-NEXT: [[TMP20:%.*]] = call align 4 [2 x i32]* @llvm.threadlocal.address.p0a2i32([2 x i32]* align 4 @_ZZ4mainE3vec) // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP20]], i64 0, i64 0 // CHECK11-NEXT: store i32 [[TMP19]], i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = call %struct.S* @llvm.threadlocal.address.p0s_struct.Ss(%struct.S* @_ZZ4mainE3var) -// CHECK11-NEXT: [[TMP22:%.*]] = call [2 x %struct.S]* @llvm.threadlocal.address.p0a2s_struct.Ss([2 x %struct.S]* @_ZZ4mainE5s_arr) +// CHECK11-NEXT: [[TMP21:%.*]] = call align 4 %struct.S* @llvm.threadlocal.address.p0s_struct.Ss(%struct.S* align 4 @_ZZ4mainE3var) +// CHECK11-NEXT: [[TMP22:%.*]] = call align 4 [2 x %struct.S]* @llvm.threadlocal.address.p0a2s_struct.Ss([2 x %struct.S]* align 4 @_ZZ4mainE5s_arr) // CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP22]], i64 0, i64 0 // CHECK11-NEXT: [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX3]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP21]]) // CHECK11-NEXT: ret void @@ -1255,7 +1255,7 @@ void foo() { // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 // CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZZ4mainE5t_var) +// CHECK11-NEXT: [[TMP1:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @_ZZ4mainE5t_var) // CHECK11-NEXT: [[TMP2:%.*]] = ptrtoint i32* [[TMP0]] to i64 // CHECK11-NEXT: [[TMP3:%.*]] = ptrtoint i32* [[TMP1]] to i64 // CHECK11-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]] @@ -1268,7 +1268,7 @@ void foo() { // CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 // CHECK11-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) -// CHECK11-NEXT: [[TMP8:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZZ4mainE5t_var) +// CHECK11-NEXT: [[TMP8:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @_ZZ4mainE5t_var) // CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK11-NEXT: store i32 [[INC]], i32* [[TMP8]], align 4 @@ -1303,12 +1303,12 @@ void foo() { // CHECK11-NEXT: store i8 1, i8* bitcast (i64* @_ZGVZ5tmainIiET_vE3var to i8*), align 8 // CHECK11-NEXT: br label [[INIT_END3]] // CHECK11: init.end3: -// CHECK11-NEXT: [[TMP4:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZZ5tmainIiET_vE5t_var) -// CHECK11-NEXT: [[TMP5:%.*]] = call [2 x i32]* @llvm.threadlocal.address.p0a2i32([2 x i32]* @_ZZ5tmainIiET_vE3vec) -// CHECK11-NEXT: [[TMP6:%.*]] = call [2 x %struct.S.0]* @llvm.threadlocal.address.p0a2s_struct.S.0s([2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr) -// CHECK11-NEXT: [[TMP7:%.*]] = call %struct.S.0* @llvm.threadlocal.address.p0s_struct.S.0s(%struct.S.0* @_ZZ5tmainIiET_vE3var) +// CHECK11-NEXT: [[TMP4:%.*]] = call align 128 i32* @llvm.threadlocal.address.p0i32(i32* align 128 @_ZZ5tmainIiET_vE5t_var) +// CHECK11-NEXT: [[TMP5:%.*]] = call align 128 [2 x i32]* @llvm.threadlocal.address.p0a2i32([2 x i32]* align 128 @_ZZ5tmainIiET_vE3vec) +// CHECK11-NEXT: [[TMP6:%.*]] = call align 128 [2 x %struct.S.0]* @llvm.threadlocal.address.p0a2s_struct.S.0s([2 x %struct.S.0]* align 128 @_ZZ5tmainIiET_vE5s_arr) +// CHECK11-NEXT: [[TMP7:%.*]] = call align 128 %struct.S.0* @llvm.threadlocal.address.p0s_struct.S.0s(%struct.S.0* align 128 @_ZZ5tmainIiET_vE3var) // CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[TMP4]], [2 x i32]* [[TMP5]], [2 x %struct.S.0]* [[TMP6]], %struct.S.0* [[TMP7]]) -// CHECK11-NEXT: [[TMP8:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZZ5tmainIiET_vE5t_var) +// CHECK11-NEXT: [[TMP8:%.*]] = call align 128 i32* @llvm.threadlocal.address.p0i32(i32* align 128 @_ZZ5tmainIiET_vE5t_var) // CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* [[TMP8]]) // CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK11-NEXT: ret i32 0 @@ -1321,7 +1321,7 @@ void foo() { // CHECK11-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 // CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 // CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP0:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @g) +// CHECK11-NEXT: [[TMP0:%.*]] = call align 128 i32* @llvm.threadlocal.address.p0i32(i32* align 128 @g) // CHECK11-NEXT: [[TMP1:%.*]] = load volatile i32, i32* [[TMP0]], align 128 // CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to float // CHECK11-NEXT: store float [[CONV]], float* [[F]], align 4 @@ -1347,7 +1347,7 @@ void foo() { // CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 // CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load float, float* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @g) +// CHECK11-NEXT: [[TMP1:%.*]] = call align 128 i32* @llvm.threadlocal.address.p0i32(i32* align 128 @g) // CHECK11-NEXT: [[TMP2:%.*]] = load volatile i32, i32* [[TMP1]], align 128 // CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP2]] to float // CHECK11-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], [[CONV]] @@ -1434,7 +1434,7 @@ void foo() { // CHECK11-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 // CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZZ5tmainIiET_vE5t_var) +// CHECK11-NEXT: [[TMP4:%.*]] = call align 128 i32* @llvm.threadlocal.address.p0i32(i32* align 128 @_ZZ5tmainIiET_vE5t_var) // CHECK11-NEXT: [[TMP5:%.*]] = ptrtoint i32* [[TMP0]] to i64 // CHECK11-NEXT: [[TMP6:%.*]] = ptrtoint i32* [[TMP4]] to i64 // CHECK11-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP5]], [[TMP6]] @@ -1442,11 +1442,11 @@ void foo() { // CHECK11: copyin.not.master: // CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP0]], align 128 // CHECK11-NEXT: store i32 [[TMP8]], i32* [[TMP4]], align 128 -// CHECK11-NEXT: [[TMP9:%.*]] = call [2 x i32]* @llvm.threadlocal.address.p0a2i32([2 x i32]* @_ZZ5tmainIiET_vE3vec) +// CHECK11-NEXT: [[TMP9:%.*]] = call align 128 [2 x i32]* @llvm.threadlocal.address.p0a2i32([2 x i32]* align 128 @_ZZ5tmainIiET_vE3vec) // CHECK11-NEXT: [[TMP10:%.*]] = bitcast [2 x i32]* [[TMP9]] to i8* // CHECK11-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* // CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP10]], i8* align 128 [[TMP11]], i64 8, i1 false) -// CHECK11-NEXT: [[TMP12:%.*]] = call [2 x %struct.S.0]* @llvm.threadlocal.address.p0a2s_struct.S.0s([2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr) +// CHECK11-NEXT: [[TMP12:%.*]] = call align 128 [2 x %struct.S.0]* @llvm.threadlocal.address.p0a2s_struct.S.0s([2 x %struct.S.0]* align 128 @_ZZ5tmainIiET_vE5s_arr) // CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP12]], i32 0, i32 0 // CHECK11-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S.0]* [[TMP2]] to %struct.S.0* // CHECK11-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 @@ -1461,20 +1461,20 @@ void foo() { // CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] // CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] // CHECK11: omp.arraycpy.done1: -// CHECK11-NEXT: [[TMP15:%.*]] = call %struct.S.0* @llvm.threadlocal.address.p0s_struct.S.0s(%struct.S.0* @_ZZ5tmainIiET_vE3var) +// CHECK11-NEXT: [[TMP15:%.*]] = call align 128 %struct.S.0* @llvm.threadlocal.address.p0s_struct.S.0s(%struct.S.0* align 128 @_ZZ5tmainIiET_vE3var) // CHECK11-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEaSERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP15]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP3]]) // CHECK11-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK11: copyin.not.master.end: // CHECK11-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 // CHECK11-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) -// CHECK11-NEXT: [[TMP18:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZZ5tmainIiET_vE5t_var) +// CHECK11-NEXT: [[TMP18:%.*]] = call align 128 i32* @llvm.threadlocal.address.p0i32(i32* align 128 @_ZZ5tmainIiET_vE5t_var) // CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 128 -// CHECK11-NEXT: [[TMP20:%.*]] = call [2 x i32]* @llvm.threadlocal.address.p0a2i32([2 x i32]* @_ZZ5tmainIiET_vE3vec) +// CHECK11-NEXT: [[TMP20:%.*]] = call align 128 [2 x i32]* @llvm.threadlocal.address.p0a2i32([2 x i32]* align 128 @_ZZ5tmainIiET_vE3vec) // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP20]], i64 0, i64 0 // CHECK11-NEXT: store i32 [[TMP19]], i32* [[ARRAYIDX]], align 128 -// CHECK11-NEXT: [[TMP21:%.*]] = call %struct.S.0* @llvm.threadlocal.address.p0s_struct.S.0s(%struct.S.0* @_ZZ5tmainIiET_vE3var) -// CHECK11-NEXT: [[TMP22:%.*]] = call [2 x %struct.S.0]* @llvm.threadlocal.address.p0a2s_struct.S.0s([2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr) +// CHECK11-NEXT: [[TMP21:%.*]] = call align 128 %struct.S.0* @llvm.threadlocal.address.p0s_struct.S.0s(%struct.S.0* align 128 @_ZZ5tmainIiET_vE3var) +// CHECK11-NEXT: [[TMP22:%.*]] = call align 128 [2 x %struct.S.0]* @llvm.threadlocal.address.p0a2s_struct.S.0s([2 x %struct.S.0]* align 128 @_ZZ5tmainIiET_vE5s_arr) // CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP22]], i64 0, i64 0 // CHECK11-NEXT: [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEaSERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX3]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP21]]) // CHECK11-NEXT: ret void @@ -1490,7 +1490,7 @@ void foo() { // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 // CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZZ5tmainIiET_vE5t_var) +// CHECK11-NEXT: [[TMP1:%.*]] = call align 128 i32* @llvm.threadlocal.address.p0i32(i32* align 128 @_ZZ5tmainIiET_vE5t_var) // CHECK11-NEXT: [[TMP2:%.*]] = ptrtoint i32* [[TMP0]] to i64 // CHECK11-NEXT: [[TMP3:%.*]] = ptrtoint i32* [[TMP1]] to i64 // CHECK11-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]] @@ -1513,7 +1513,7 @@ void foo() { // CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 // CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 // CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP0:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @g) +// CHECK11-NEXT: [[TMP0:%.*]] = call align 128 i32* @llvm.threadlocal.address.p0i32(i32* align 128 @g) // CHECK11-NEXT: [[TMP1:%.*]] = load volatile i32, i32* [[TMP0]], align 128 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[F]], align 4 // CHECK11-NEXT: ret void @@ -1538,7 +1538,7 @@ void foo() { // CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 // CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @g) +// CHECK11-NEXT: [[TMP1:%.*]] = call align 128 i32* @llvm.threadlocal.address.p0i32(i32* align 128 @g) // CHECK11-NEXT: [[TMP2:%.*]] = load volatile i32, i32* [[TMP1]], align 128 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP2]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[F]], align 4 @@ -1547,7 +1547,7 @@ void foo() { // // CHECK11-LABEL: define {{[^@]+}}@_ZTW1g // CHECK11-SAME: () #[[ATTR9:[0-9]+]] comdat { -// CHECK11-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @g) +// CHECK11-NEXT: [[TMP1:%.*]] = call align 128 i32* @llvm.threadlocal.address.p0i32(i32* align 128 @g) // CHECK11-NEXT: ret i32* [[TMP1]] // // @@ -1572,7 +1572,7 @@ void foo() { // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[G_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @g) +// CHECK13-NEXT: [[TMP1:%.*]] = call align 128 i32* @llvm.threadlocal.address.p0i32(i32* align 128 @g) // CHECK13-NEXT: [[TMP2:%.*]] = ptrtoint i32* [[TMP0]] to i64 // CHECK13-NEXT: [[TMP3:%.*]] = ptrtoint i32* [[TMP1]] to i64 // CHECK13-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]] @@ -1585,7 +1585,7 @@ void foo() { // CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 // CHECK13-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]]) -// CHECK13-NEXT: [[TMP8:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @g) +// CHECK13-NEXT: [[TMP8:%.*]] = call align 128 i32* @llvm.threadlocal.address.p0i32(i32* align 128 @g) // CHECK13-NEXT: store volatile i32 1, i32* [[TMP8]], align 128 // CHECK13-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 1 dereferenceable(1) [[REF_TMP]]) // CHECK13-NEXT: ret void @@ -1593,7 +1593,7 @@ void foo() { // // CHECK13-LABEL: define {{[^@]+}}@_ZTW1g // CHECK13-SAME: () #[[ATTR6:[0-9]+]] comdat { -// CHECK13-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @g) +// CHECK13-NEXT: [[TMP1:%.*]] = call align 128 i32* @llvm.threadlocal.address.p0i32(i32* align 128 @g) // CHECK13-NEXT: ret i32* [[TMP1]] // // @@ -1616,7 +1616,7 @@ void foo() { // CHECK14-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK14-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* // CHECK14-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>** [[BLOCK_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @g) +// CHECK14-NEXT: [[TMP0:%.*]] = call align 128 i32* @llvm.threadlocal.address.p0i32(i32* align 128 @g) // CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP0]]) // CHECK14-NEXT: ret void // @@ -1631,7 +1631,7 @@ void foo() { // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK14-NEXT: [[TMP0:%.*]] = load i32*, i32** [[G_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @g) +// CHECK14-NEXT: [[TMP1:%.*]] = call align 128 i32* @llvm.threadlocal.address.p0i32(i32* align 128 @g) // CHECK14-NEXT: [[TMP2:%.*]] = ptrtoint i32* [[TMP0]] to i64 // CHECK14-NEXT: [[TMP3:%.*]] = ptrtoint i32* [[TMP1]] to i64 // CHECK14-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]] @@ -1644,7 +1644,7 @@ void foo() { // CHECK14-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 // CHECK14-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]]) -// CHECK14-NEXT: [[TMP8:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @g) +// CHECK14-NEXT: [[TMP8:%.*]] = call align 128 i32* @llvm.threadlocal.address.p0i32(i32* align 128 @g) // CHECK14-NEXT: store volatile i32 1, i32* [[TMP8]], align 128 // CHECK14-NEXT: [[TMP9:%.*]] = load i8*, i8** getelementptr inbounds ([[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global.2 to %struct.__block_literal_generic*), i32 0, i32 3), align 8 // CHECK14-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to void (i8*)* @@ -1660,14 +1660,14 @@ void foo() { // CHECK14-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK14-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* // CHECK14-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>** [[BLOCK_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @g) +// CHECK14-NEXT: [[TMP0:%.*]] = call align 128 i32* @llvm.threadlocal.address.p0i32(i32* align 128 @g) // CHECK14-NEXT: store volatile i32 2, i32* [[TMP0]], align 128 // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@_ZTW1g // CHECK14-SAME: () #[[ATTR7:[0-9]+]] comdat { -// CHECK14-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @g) +// CHECK14-NEXT: [[TMP1:%.*]] = call align 128 i32* @llvm.threadlocal.address.p0i32(i32* align 128 @g) // CHECK14-NEXT: ret i32* [[TMP1]] // // @@ -1690,8 +1690,8 @@ void foo() { // CHECK15-NEXT: store i8 1, i8* @_ZGVZ10array_funcvE1s, align 1 // CHECK15-NEXT: br label [[INIT_END]] // CHECK15: init.end: -// CHECK15-NEXT: [[TMP2:%.*]] = call [2 x i32]* @llvm.threadlocal.address.p0a2i32([2 x i32]* @_ZZ10array_funcvE1a) -// CHECK15-NEXT: [[TMP3:%.*]] = call [2 x %struct.St]* @llvm.threadlocal.address.p0a2s_struct.Sts([2 x %struct.St]* @_ZZ10array_funcvE1s) +// CHECK15-NEXT: [[TMP2:%.*]] = call align 4 [2 x i32]* @llvm.threadlocal.address.p0a2i32([2 x i32]* align 4 @_ZZ10array_funcvE1a) +// CHECK15-NEXT: [[TMP3:%.*]] = call align 16 [2 x %struct.St]* @llvm.threadlocal.address.p0a2s_struct.Sts([2 x %struct.St]* align 16 @_ZZ10array_funcvE1s) // CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, [2 x %struct.St]*)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP2]], [2 x %struct.St]* [[TMP3]]) // CHECK15-NEXT: ret void // @@ -1745,7 +1745,7 @@ void foo() { // CHECK15-NEXT: store [2 x %struct.St]* [[S]], [2 x %struct.St]** [[S_ADDR]], align 8 // CHECK15-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[A_ADDR]], align 8 // CHECK15-NEXT: [[TMP1:%.*]] = load [2 x %struct.St]*, [2 x %struct.St]** [[S_ADDR]], align 8 -// CHECK15-NEXT: [[TMP2:%.*]] = call [2 x i32]* @llvm.threadlocal.address.p0a2i32([2 x i32]* @_ZZ10array_funcvE1a) +// CHECK15-NEXT: [[TMP2:%.*]] = call align 4 [2 x i32]* @llvm.threadlocal.address.p0a2i32([2 x i32]* align 4 @_ZZ10array_funcvE1a) // CHECK15-NEXT: [[TMP3:%.*]] = ptrtoint [2 x i32]* [[TMP0]] to i64 // CHECK15-NEXT: [[TMP4:%.*]] = ptrtoint [2 x i32]* [[TMP2]] to i64 // CHECK15-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] @@ -1754,7 +1754,7 @@ void foo() { // CHECK15-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* // CHECK15-NEXT: [[TMP7:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP6]], i8* align 4 [[TMP7]], i64 8, i1 false) -// CHECK15-NEXT: [[TMP8:%.*]] = call [2 x %struct.St]* @llvm.threadlocal.address.p0a2s_struct.Sts([2 x %struct.St]* @_ZZ10array_funcvE1s) +// CHECK15-NEXT: [[TMP8:%.*]] = call align 16 [2 x %struct.St]* @llvm.threadlocal.address.p0a2s_struct.Sts([2 x %struct.St]* align 16 @_ZZ10array_funcvE1s) // CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.St], [2 x %struct.St]* [[TMP8]], i32 0, i32 0 // CHECK15-NEXT: [[TMP9:%.*]] = bitcast [2 x %struct.St]* [[TMP1]] to %struct.St* // CHECK15-NEXT: [[TMP10:%.*]] = getelementptr [[STRUCT_ST:%.*]], %struct.St* [[ARRAY_BEGIN]], i64 2 @@ -1814,7 +1814,7 @@ void foo() { // CHECK16-SAME: () #[[ATTR0:[0-9]+]] section ".text.startup" { // CHECK16-NEXT: entry: // CHECK16-NEXT: [[CALL:%.*]] = call noundef i32 @_Z6t_initv() -// CHECK16-NEXT: [[TMP0:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @t) +// CHECK16-NEXT: [[TMP0:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @t) // CHECK16-NEXT: store i32 [[CALL]], i32* [[TMP0]], align 4 // CHECK16-NEXT: ret void // @@ -1871,7 +1871,7 @@ void foo() { // CHECK16-LABEL: define {{[^@]+}}@_ZTW1t // CHECK16-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK16-NEXT: call void @_ZTH1t() -// CHECK16-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @t) +// CHECK16-NEXT: [[TMP1:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @t) // CHECK16-NEXT: ret i32* [[TMP1]] // // diff --git a/clang/test/OpenMP/parallel_copyin_combined_codegen.c b/clang/test/OpenMP/parallel_copyin_combined_codegen.c index 20283f3693ac..2912cd4c4503 100644 --- a/clang/test/OpenMP/parallel_copyin_combined_codegen.c +++ b/clang/test/OpenMP/parallel_copyin_combined_codegen.c @@ -57,9 +57,9 @@ void test_omp_parallel_master_copyin(int *a) { // CHECK-NEXT: entry: // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK-NEXT: [[TMP0:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @x) // CHECK-NEXT: store i32 1, i32* [[TMP0]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK-NEXT: [[TMP1:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @x) // CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32**, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32** [[A_ADDR]], i32* [[TMP1]]) // CHECK-NEXT: ret void // @@ -83,7 +83,7 @@ void test_omp_parallel_master_copyin(int *a) { // CHECK-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** [[X_ADDR]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK-NEXT: [[TMP2:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @x) // CHECK-NEXT: [[TMP3:%.*]] = ptrtoint i32* [[TMP1]] to i64 // CHECK-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[TMP2]] to i64 // CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] @@ -128,7 +128,7 @@ void test_omp_parallel_master_copyin(int *a) { // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 -// CHECK-NEXT: [[TMP18:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK-NEXT: [[TMP18:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @x) // CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 // CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP19]] // CHECK-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP0]], align 8 @@ -160,9 +160,9 @@ void test_omp_parallel_master_copyin(int *a) { // CHECK-NEXT: entry: // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK-NEXT: [[TMP0:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @x) // CHECK-NEXT: store i32 2, i32* [[TMP0]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK-NEXT: [[TMP1:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @x) // CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32**, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32** [[A_ADDR]], i32* [[TMP1]]) // CHECK-NEXT: ret void // @@ -186,7 +186,7 @@ void test_omp_parallel_master_copyin(int *a) { // CHECK-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** [[X_ADDR]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK-NEXT: [[TMP2:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @x) // CHECK-NEXT: [[TMP3:%.*]] = ptrtoint i32* [[TMP1]] to i64 // CHECK-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[TMP2]] to i64 // CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] @@ -231,7 +231,7 @@ void test_omp_parallel_master_copyin(int *a) { // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 -// CHECK-NEXT: [[TMP18:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK-NEXT: [[TMP18:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @x) // CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 // CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP19]] // CHECK-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP0]], align 8 @@ -260,9 +260,9 @@ void test_omp_parallel_master_copyin(int *a) { // CHECK-NEXT: entry: // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK-NEXT: [[TMP0:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @x) // CHECK-NEXT: store i32 3, i32* [[TMP0]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK-NEXT: [[TMP1:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @x) // CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32**, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32** [[A_ADDR]], i32* [[TMP1]]) // CHECK-NEXT: ret void // @@ -286,7 +286,7 @@ void test_omp_parallel_master_copyin(int *a) { // CHECK-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** [[X_ADDR]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK-NEXT: [[TMP2:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @x) // CHECK-NEXT: [[TMP3:%.*]] = ptrtoint i32* [[TMP1]] to i64 // CHECK-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[TMP2]] to i64 // CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] @@ -331,7 +331,7 @@ void test_omp_parallel_master_copyin(int *a) { // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group // CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group -// CHECK-NEXT: [[TMP18:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK-NEXT: [[TMP18:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @x) // CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4, !llvm.access.group // CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP19]] // CHECK-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP0]], align 8, !llvm.access.group @@ -369,9 +369,9 @@ void test_omp_parallel_master_copyin(int *a) { // CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 8 // CHECK-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK-NEXT: [[TMP0:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @x) // CHECK-NEXT: store i32 4, i32* [[TMP0]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK-NEXT: [[TMP1:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @x) // CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32**, i32**, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32** [[A_ADDR]], i32** [[B_ADDR]], i32* [[TMP1]]) // CHECK-NEXT: ret void // @@ -396,7 +396,7 @@ void test_omp_parallel_master_copyin(int *a) { // CHECK-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[B_ADDR]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load i32*, i32** [[X_ADDR]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK-NEXT: [[TMP3:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @x) // CHECK-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[TMP2]] to i64 // CHECK-NEXT: [[TMP5:%.*]] = ptrtoint i32* [[TMP3]] to i64 // CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP4]], [[TMP5]] @@ -435,13 +435,13 @@ void test_omp_parallel_master_copyin(int *a) { // CHECK-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK-NEXT: ] // CHECK: .omp.sections.case: -// CHECK-NEXT: [[TMP19:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK-NEXT: [[TMP19:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @x) // CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 // CHECK-NEXT: [[TMP21:%.*]] = load i32*, i32** [[TMP0]], align 8 // CHECK-NEXT: store i32 [[TMP20]], i32* [[TMP21]], align 4 // CHECK-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK: .omp.sections.case1: -// CHECK-NEXT: [[TMP22:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK-NEXT: [[TMP22:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @x) // CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 // CHECK-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP1]], align 8 // CHECK-NEXT: store i32 [[TMP23]], i32* [[TMP24]], align 4 @@ -464,9 +464,9 @@ void test_omp_parallel_master_copyin(int *a) { // CHECK-NEXT: entry: // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK-NEXT: [[TMP0:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @x) // CHECK-NEXT: store i32 5, i32* [[TMP0]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK-NEXT: [[TMP1:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @x) // CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32**, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32** [[A_ADDR]], i32* [[TMP1]]) // CHECK-NEXT: ret void // @@ -484,7 +484,7 @@ void test_omp_parallel_master_copyin(int *a) { // CHECK-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** [[X_ADDR]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK-NEXT: [[TMP2:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @x) // CHECK-NEXT: [[TMP3:%.*]] = ptrtoint i32* [[TMP1]] to i64 // CHECK-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[TMP2]] to i64 // CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] @@ -511,7 +511,7 @@ void test_omp_parallel_master_copyin(int *a) { // CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] // CHECK: for.body: // CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 -// CHECK-NEXT: [[TMP15:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK-NEXT: [[TMP15:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @x) // CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP16]] // CHECK-NEXT: [[TMP17:%.*]] = load i32*, i32** [[TMP0]], align 8 diff --git a/clang/test/OpenMP/parallel_master_codegen.cpp b/clang/test/OpenMP/parallel_master_codegen.cpp index 1fb9e62066e1..9d3c0c2ce64f 100644 --- a/clang/test/OpenMP/parallel_master_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_codegen.cpp @@ -629,7 +629,7 @@ void parallel_master_allocate() { // CHECK29-LABEL: define {{[^@]+}}@_Z22parallel_master_copyinv // CHECK29-SAME: () #[[ATTR0:[0-9]+]] { // CHECK29-NEXT: entry: -// CHECK29-NEXT: [[TMP0:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @a) +// CHECK29-NEXT: [[TMP0:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @a) // CHECK29-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP0]]) // CHECK29-NEXT: ret void // @@ -644,7 +644,7 @@ void parallel_master_allocate() { // CHECK29-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK29-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK29-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK29-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @a) +// CHECK29-NEXT: [[TMP1:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @a) // CHECK29-NEXT: [[TMP2:%.*]] = ptrtoint i32* [[TMP0]] to i64 // CHECK29-NEXT: [[TMP3:%.*]] = ptrtoint i32* [[TMP1]] to i64 // CHECK29-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]] @@ -663,7 +663,7 @@ void parallel_master_allocate() { // CHECK29-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 // CHECK29-NEXT: br i1 [[TMP11]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK29: omp_if.then: -// CHECK29-NEXT: [[TMP12:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @a) +// CHECK29-NEXT: [[TMP12:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @a) // CHECK29-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 // CHECK29-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK29-NEXT: store i32 [[INC]], i32* [[TMP12]], align 4 @@ -675,6 +675,6 @@ void parallel_master_allocate() { // // CHECK29-LABEL: define {{[^@]+}}@_ZTW1a // CHECK29-SAME: () #[[ATTR5:[0-9]+]] comdat { -// CHECK29-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @a) +// CHECK29-NEXT: [[TMP1:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @a) // CHECK29-NEXT: ret i32* [[TMP1]] // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp index 80256bd85b21..782b28a01ee8 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp @@ -103,7 +103,7 @@ int main() { // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK1-NEXT: [[TMP0:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @x) // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[X_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV]], align 4 @@ -325,7 +325,7 @@ int main() { // CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK1-NEXT: [[TMP0:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @x) // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[X_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV]], align 4 @@ -539,7 +539,7 @@ int main() { // // CHECK1-LABEL: define {{[^@]+}}@_ZTW1x // CHECK1-SAME: () #[[ATTR5:[0-9]+]] comdat { -// CHECK1-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK1-NEXT: [[TMP1:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @x) // CHECK1-NEXT: ret i32* [[TMP1]] // // @@ -561,7 +561,7 @@ int main() { // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK3-NEXT: [[TMP0:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @x) // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[X_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[X_CASTED]], align 4 @@ -776,7 +776,7 @@ int main() { // CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK3-NEXT: [[TMP0:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @x) // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[X_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[X_CASTED]], align 4 @@ -983,7 +983,7 @@ int main() { // // CHECK3-LABEL: define {{[^@]+}}@_ZTW1x // CHECK3-SAME: () #[[ATTR5:[0-9]+]] comdat { -// CHECK3-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK3-NEXT: [[TMP1:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @x) // CHECK3-NEXT: ret i32* [[TMP1]] // // @@ -1177,7 +1177,7 @@ int main() { // // CHECK9-LABEL: define {{[^@]+}}@_ZTW1x // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { -// CHECK9-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK9-NEXT: [[TMP1:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @x) // CHECK9-NEXT: ret i32* [[TMP1]] // // diff --git a/clang/test/OpenMP/threadprivate_codegen.cpp b/clang/test/OpenMP/threadprivate_codegen.cpp index ff3fec914c2e..53a6e38a30a9 100644 --- a/clang/test/OpenMP/threadprivate_codegen.cpp +++ b/clang/test/OpenMP/threadprivate_codegen.cpp @@ -3662,7 +3662,7 @@ int foobar() { // CHECK-TLS1-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], %struct.S3* [[TMP4]], i32 0, i32 0 // CHECK-TLS1-NEXT: [[TMP5:%.*]] = load i32, i32* [[A1]], align 4 // CHECK-TLS1-NEXT: store i32 [[TMP5]], i32* [[RES]], align 4 -// CHECK-TLS1-NEXT: [[TMP6:%.*]] = call %struct.Smain* @llvm.threadlocal.address.p0s_struct.Smains(%struct.Smain* @_ZZ4mainE2sm) +// CHECK-TLS1-NEXT: [[TMP6:%.*]] = call align 8 %struct.Smain* @llvm.threadlocal.address.p0s_struct.Smains(%struct.Smain* align 8 @_ZZ4mainE2sm) // CHECK-TLS1-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[TMP6]], i32 0, i32 0 // CHECK-TLS1-NEXT: [[TMP7:%.*]] = load i32, i32* [[A2]], align 8 // CHECK-TLS1-NEXT: [[TMP8:%.*]] = load i32, i32* [[RES]], align 4 @@ -3692,12 +3692,12 @@ int foobar() { // CHECK-TLS1-NEXT: [[TMP19:%.*]] = load i32, i32* [[RES]], align 4 // CHECK-TLS1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], [[TMP18]] // CHECK-TLS1-NEXT: store i32 [[ADD10]], i32* [[RES]], align 4 -// CHECK-TLS1-NEXT: [[TMP20:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZN2STIiE2stE) +// CHECK-TLS1-NEXT: [[TMP20:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @_ZN2STIiE2stE) // CHECK-TLS1-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 // CHECK-TLS1-NEXT: [[TMP22:%.*]] = load i32, i32* [[RES]], align 4 // CHECK-TLS1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP21]] // CHECK-TLS1-NEXT: store i32 [[ADD11]], i32* [[RES]], align 4 -// CHECK-TLS1-NEXT: [[TMP23:%.*]] = call float* @llvm.threadlocal.address.p0f32(float* @_ZN2STIfE2stE) +// CHECK-TLS1-NEXT: [[TMP23:%.*]] = call align 4 float* @llvm.threadlocal.address.p0f32(float* align 4 @_ZN2STIfE2stE) // CHECK-TLS1-NEXT: [[TMP24:%.*]] = load float, float* [[TMP23]], align 4 // CHECK-TLS1-NEXT: [[CONV:%.*]] = fptosi float [[TMP24]] to i32 // CHECK-TLS1-NEXT: [[TMP25:%.*]] = load i32, i32* [[RES]], align 4 @@ -3716,7 +3716,7 @@ int foobar() { // CHECK-TLS1-LABEL: define {{[^@]+}}@_ZTWL3gs1 // CHECK-TLS1-SAME: () #[[ATTR5:[0-9]+]] { // CHECK-TLS1-NEXT: call void @_ZTHL3gs1() -// CHECK-TLS1-NEXT: [[TMP1:%.*]] = call %struct.S1* @llvm.threadlocal.address.p0s_struct.S1s(%struct.S1* @_ZL3gs1) +// CHECK-TLS1-NEXT: [[TMP1:%.*]] = call align 4 %struct.S1* @llvm.threadlocal.address.p0s_struct.S1s(%struct.S1* align 4 @_ZL3gs1) // CHECK-TLS1-NEXT: ret %struct.S1* [[TMP1]] // // @@ -3750,7 +3750,7 @@ int foobar() { // CHECK-TLS1-NEXT: call void @_ZTHN6Static1sE() // CHECK-TLS1-NEXT: br label [[TMP2]] // CHECK-TLS1: 2: -// CHECK-TLS1-NEXT: [[TMP3:%.*]] = call %struct.S3* @llvm.threadlocal.address.p0s_struct.S3s(%struct.S3* @_ZN6Static1sE) +// CHECK-TLS1-NEXT: [[TMP3:%.*]] = call align 4 %struct.S3* @llvm.threadlocal.address.p0s_struct.S3s(%struct.S3* align 4 @_ZN6Static1sE) // CHECK-TLS1-NEXT: ret %struct.S3* [[TMP3]] // // @@ -3761,21 +3761,21 @@ int foobar() { // CHECK-TLS1-NEXT: call void @_ZTH3gs3() // CHECK-TLS1-NEXT: br label [[TMP2]] // CHECK-TLS1: 2: -// CHECK-TLS1-NEXT: [[TMP3:%.*]] = call %struct.S5* @llvm.threadlocal.address.p0s_struct.S5s(%struct.S5* @gs3) +// CHECK-TLS1-NEXT: [[TMP3:%.*]] = call align 4 %struct.S5* @llvm.threadlocal.address.p0s_struct.S5s(%struct.S5* align 4 @gs3) // CHECK-TLS1-NEXT: ret %struct.S5* [[TMP3]] // // // CHECK-TLS1-LABEL: define {{[^@]+}}@_ZTW5arr_x // CHECK-TLS1-SAME: () #[[ATTR5]] comdat { // CHECK-TLS1-NEXT: call void @_ZTH5arr_x() -// CHECK-TLS1-NEXT: [[TMP1:%.*]] = call [2 x [3 x %struct.S1]]* @llvm.threadlocal.address.p0a2a3s_struct.S1s([2 x [3 x %struct.S1]]* @arr_x) +// CHECK-TLS1-NEXT: [[TMP1:%.*]] = call align 16 [2 x [3 x %struct.S1]]* @llvm.threadlocal.address.p0a2a3s_struct.S1s([2 x [3 x %struct.S1]]* align 16 @arr_x) // CHECK-TLS1-NEXT: ret [2 x [3 x %struct.S1]]* [[TMP1]] // // // CHECK-TLS1-LABEL: define {{[^@]+}}@_ZTWN2STI2S4E2stE // CHECK-TLS1-SAME: () #[[ATTR5]] comdat { // CHECK-TLS1-NEXT: call void @_ZTHN2STI2S4E2stE() -// CHECK-TLS1-NEXT: [[TMP1:%.*]] = call %struct.S4* @llvm.threadlocal.address.p0s_struct.S4s(%struct.S4* @_ZN2STI2S4E2stE) +// CHECK-TLS1-NEXT: [[TMP1:%.*]] = call align 4 %struct.S4* @llvm.threadlocal.address.p0s_struct.S4s(%struct.S4* align 4 @_ZN2STI2S4E2stE) // CHECK-TLS1-NEXT: ret %struct.S4* [[TMP1]] // // @@ -3836,12 +3836,12 @@ int foobar() { // CHECK-TLS1-NEXT: [[TMP12:%.*]] = load i32, i32* [[RES]], align 4 // CHECK-TLS1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] // CHECK-TLS1-NEXT: store i32 [[ADD7]], i32* [[RES]], align 4 -// CHECK-TLS1-NEXT: [[TMP13:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZN2STIiE2stE) +// CHECK-TLS1-NEXT: [[TMP13:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @_ZN2STIiE2stE) // CHECK-TLS1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 // CHECK-TLS1-NEXT: [[TMP15:%.*]] = load i32, i32* [[RES]], align 4 // CHECK-TLS1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], [[TMP14]] // CHECK-TLS1-NEXT: store i32 [[ADD8]], i32* [[RES]], align 4 -// CHECK-TLS1-NEXT: [[TMP16:%.*]] = call float* @llvm.threadlocal.address.p0f32(float* @_ZN2STIfE2stE) +// CHECK-TLS1-NEXT: [[TMP16:%.*]] = call align 4 float* @llvm.threadlocal.address.p0f32(float* align 4 @_ZN2STIfE2stE) // CHECK-TLS1-NEXT: [[TMP17:%.*]] = load float, float* [[TMP16]], align 4 // CHECK-TLS1-NEXT: [[CONV:%.*]] = fptosi float [[TMP17]] to i32 // CHECK-TLS1-NEXT: [[TMP18:%.*]] = load i32, i32* [[RES]], align 4 @@ -3964,7 +3964,7 @@ int foobar() { // CHECK-TLS2-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], %struct.S3* [[TMP4]], i32 0, i32 0 // CHECK-TLS2-NEXT: [[TMP5:%.*]] = load i32, i32* [[A1]], align 4 // CHECK-TLS2-NEXT: store i32 [[TMP5]], i32* [[RES]], align 4 -// CHECK-TLS2-NEXT: [[TMP6:%.*]] = call %struct.Smain* @llvm.threadlocal.address.p0s_struct.Smains(%struct.Smain* @_ZZ4mainE2sm) +// CHECK-TLS2-NEXT: [[TMP6:%.*]] = call align 8 %struct.Smain* @llvm.threadlocal.address.p0s_struct.Smains(%struct.Smain* align 8 @_ZZ4mainE2sm) // CHECK-TLS2-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[TMP6]], i32 0, i32 0 // CHECK-TLS2-NEXT: [[TMP7:%.*]] = load i32, i32* [[A2]], align 8 // CHECK-TLS2-NEXT: [[TMP8:%.*]] = load i32, i32* [[RES]], align 4 @@ -4018,7 +4018,7 @@ int foobar() { // CHECK-TLS2-LABEL: define {{[^@]+}}@_ZTWL3gs1 // CHECK-TLS2-SAME: () #[[ATTR1:[0-9]+]] { // CHECK-TLS2-NEXT: call void @_ZTHL3gs1() -// CHECK-TLS2-NEXT: [[TMP1:%.*]] = call %struct.S1* @llvm.threadlocal.address.p0s_struct.S1s(%struct.S1* @_ZL3gs1) +// CHECK-TLS2-NEXT: [[TMP1:%.*]] = call align 4 %struct.S1* @llvm.threadlocal.address.p0s_struct.S1s(%struct.S1* align 4 @_ZL3gs1) // CHECK-TLS2-NEXT: ret %struct.S1* [[TMP1]] // // @@ -4052,7 +4052,7 @@ int foobar() { // CHECK-TLS2-NEXT: call void @_ZTHN6Static1sE() // CHECK-TLS2-NEXT: br label [[TMP2]] // CHECK-TLS2: 2: -// CHECK-TLS2-NEXT: [[TMP3:%.*]] = call %struct.S3* @llvm.threadlocal.address.p0s_struct.S3s(%struct.S3* @_ZN6Static1sE) +// CHECK-TLS2-NEXT: [[TMP3:%.*]] = call align 4 %struct.S3* @llvm.threadlocal.address.p0s_struct.S3s(%struct.S3* align 4 @_ZN6Static1sE) // CHECK-TLS2-NEXT: ret %struct.S3* [[TMP3]] // // @@ -4063,33 +4063,33 @@ int foobar() { // CHECK-TLS2-NEXT: call void @_ZTH3gs3() // CHECK-TLS2-NEXT: br label [[TMP2]] // CHECK-TLS2: 2: -// CHECK-TLS2-NEXT: [[TMP3:%.*]] = call %struct.S5* @llvm.threadlocal.address.p0s_struct.S5s(%struct.S5* @gs3) +// CHECK-TLS2-NEXT: [[TMP3:%.*]] = call align 4 %struct.S5* @llvm.threadlocal.address.p0s_struct.S5s(%struct.S5* align 4 @gs3) // CHECK-TLS2-NEXT: ret %struct.S5* [[TMP3]] // // // CHECK-TLS2-LABEL: define {{[^@]+}}@_ZTW5arr_x // CHECK-TLS2-SAME: () #[[ATTR1]] comdat { // CHECK-TLS2-NEXT: call void @_ZTH5arr_x() -// CHECK-TLS2-NEXT: [[TMP1:%.*]] = call [2 x [3 x %struct.S1]]* @llvm.threadlocal.address.p0a2a3s_struct.S1s([2 x [3 x %struct.S1]]* @arr_x) +// CHECK-TLS2-NEXT: [[TMP1:%.*]] = call align 16 [2 x [3 x %struct.S1]]* @llvm.threadlocal.address.p0a2a3s_struct.S1s([2 x [3 x %struct.S1]]* align 16 @arr_x) // CHECK-TLS2-NEXT: ret [2 x [3 x %struct.S1]]* [[TMP1]] // // // CHECK-TLS2-LABEL: define {{[^@]+}}@_ZTWN2STIiE2stE // CHECK-TLS2-SAME: () #[[ATTR1]] comdat { -// CHECK-TLS2-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZN2STIiE2stE) +// CHECK-TLS2-NEXT: [[TMP1:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @_ZN2STIiE2stE) // CHECK-TLS2-NEXT: ret i32* [[TMP1]] // // // CHECK-TLS2-LABEL: define {{[^@]+}}@_ZTWN2STIfE2stE // CHECK-TLS2-SAME: () #[[ATTR1]] comdat { -// CHECK-TLS2-NEXT: [[TMP1:%.*]] = call float* @llvm.threadlocal.address.p0f32(float* @_ZN2STIfE2stE) +// CHECK-TLS2-NEXT: [[TMP1:%.*]] = call align 4 float* @llvm.threadlocal.address.p0f32(float* align 4 @_ZN2STIfE2stE) // CHECK-TLS2-NEXT: ret float* [[TMP1]] // // // CHECK-TLS2-LABEL: define {{[^@]+}}@_ZTWN2STI2S4E2stE // CHECK-TLS2-SAME: () #[[ATTR1]] comdat { // CHECK-TLS2-NEXT: call void @_ZTHN2STI2S4E2stE() -// CHECK-TLS2-NEXT: [[TMP1:%.*]] = call %struct.S4* @llvm.threadlocal.address.p0s_struct.S4s(%struct.S4* @_ZN2STI2S4E2stE) +// CHECK-TLS2-NEXT: [[TMP1:%.*]] = call align 4 %struct.S4* @llvm.threadlocal.address.p0s_struct.S4s(%struct.S4* align 4 @_ZN2STI2S4E2stE) // CHECK-TLS2-NEXT: ret %struct.S4* [[TMP1]] // // @@ -4734,7 +4734,7 @@ int foobar() { // CHECK-TLS3-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], %struct.S3* [[TMP4]], i32 0, i32 0, !dbg [[DBG212:![0-9]+]] // CHECK-TLS3-NEXT: [[TMP5:%.*]] = load i32, i32* [[A1]], align 4, !dbg [[DBG212]] // CHECK-TLS3-NEXT: store i32 [[TMP5]], i32* [[RES]], align 4, !dbg [[DBG213:![0-9]+]] -// CHECK-TLS3-NEXT: [[TMP6:%.*]] = call %struct.Smain* @llvm.threadlocal.address.p0s_struct.Smains(%struct.Smain* @_ZZ4mainE2sm), !dbg [[DBG214:![0-9]+]] +// CHECK-TLS3-NEXT: [[TMP6:%.*]] = call align 8 %struct.Smain* @llvm.threadlocal.address.p0s_struct.Smains(%struct.Smain* align 8 @_ZZ4mainE2sm), !dbg [[DBG214:![0-9]+]] // CHECK-TLS3-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[TMP6]], i32 0, i32 0, !dbg [[DBG215:![0-9]+]] // CHECK-TLS3-NEXT: [[TMP7:%.*]] = load i32, i32* [[A2]], align 8, !dbg [[DBG215]] // CHECK-TLS3-NEXT: [[TMP8:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG216:![0-9]+]] @@ -4764,12 +4764,12 @@ int foobar() { // CHECK-TLS3-NEXT: [[TMP19:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG227:![0-9]+]] // CHECK-TLS3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], [[TMP18]], !dbg [[DBG227]] // CHECK-TLS3-NEXT: store i32 [[ADD10]], i32* [[RES]], align 4, !dbg [[DBG227]] -// CHECK-TLS3-NEXT: [[TMP20:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZN2STIiE2stE), !dbg [[DBG228:![0-9]+]] +// CHECK-TLS3-NEXT: [[TMP20:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @_ZN2STIiE2stE), !dbg [[DBG228:![0-9]+]] // CHECK-TLS3-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4, !dbg [[DBG228]] // CHECK-TLS3-NEXT: [[TMP22:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG229:![0-9]+]] // CHECK-TLS3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP21]], !dbg [[DBG229]] // CHECK-TLS3-NEXT: store i32 [[ADD11]], i32* [[RES]], align 4, !dbg [[DBG229]] -// CHECK-TLS3-NEXT: [[TMP23:%.*]] = call float* @llvm.threadlocal.address.p0f32(float* @_ZN2STIfE2stE), !dbg [[DBG230:![0-9]+]] +// CHECK-TLS3-NEXT: [[TMP23:%.*]] = call align 4 float* @llvm.threadlocal.address.p0f32(float* align 4 @_ZN2STIfE2stE), !dbg [[DBG230:![0-9]+]] // CHECK-TLS3-NEXT: [[TMP24:%.*]] = load float, float* [[TMP23]], align 4, !dbg [[DBG230]] // CHECK-TLS3-NEXT: [[CONV:%.*]] = fptosi float [[TMP24]] to i32, !dbg [[DBG230]] // CHECK-TLS3-NEXT: [[TMP25:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG231:![0-9]+]] @@ -4788,7 +4788,7 @@ int foobar() { // CHECK-TLS3-LABEL: define {{[^@]+}}@_ZTWL3gs1 // CHECK-TLS3-SAME: () #[[ATTR6:[0-9]+]] { // CHECK-TLS3-NEXT: call void @_ZTHL3gs1() -// CHECK-TLS3-NEXT: [[TMP1:%.*]] = call %struct.S1* @llvm.threadlocal.address.p0s_struct.S1s(%struct.S1* @_ZL3gs1) +// CHECK-TLS3-NEXT: [[TMP1:%.*]] = call align 4 %struct.S1* @llvm.threadlocal.address.p0s_struct.S1s(%struct.S1* align 4 @_ZL3gs1) // CHECK-TLS3-NEXT: ret %struct.S1* [[TMP1]] // // @@ -4825,7 +4825,7 @@ int foobar() { // CHECK-TLS3-NEXT: call void @_ZTHN6Static1sE() // CHECK-TLS3-NEXT: br label [[TMP2]] // CHECK-TLS3: 2: -// CHECK-TLS3-NEXT: [[TMP3:%.*]] = call %struct.S3* @llvm.threadlocal.address.p0s_struct.S3s(%struct.S3* @_ZN6Static1sE) +// CHECK-TLS3-NEXT: [[TMP3:%.*]] = call align 4 %struct.S3* @llvm.threadlocal.address.p0s_struct.S3s(%struct.S3* align 4 @_ZN6Static1sE) // CHECK-TLS3-NEXT: ret %struct.S3* [[TMP3]] // // @@ -4836,21 +4836,21 @@ int foobar() { // CHECK-TLS3-NEXT: call void @_ZTH3gs3() // CHECK-TLS3-NEXT: br label [[TMP2]] // CHECK-TLS3: 2: -// CHECK-TLS3-NEXT: [[TMP3:%.*]] = call %struct.S5* @llvm.threadlocal.address.p0s_struct.S5s(%struct.S5* @gs3) +// CHECK-TLS3-NEXT: [[TMP3:%.*]] = call align 4 %struct.S5* @llvm.threadlocal.address.p0s_struct.S5s(%struct.S5* align 4 @gs3) // CHECK-TLS3-NEXT: ret %struct.S5* [[TMP3]] // // // CHECK-TLS3-LABEL: define {{[^@]+}}@_ZTW5arr_x // CHECK-TLS3-SAME: () #[[ATTR6]] comdat { // CHECK-TLS3-NEXT: call void @_ZTH5arr_x() -// CHECK-TLS3-NEXT: [[TMP1:%.*]] = call [2 x [3 x %struct.S1]]* @llvm.threadlocal.address.p0a2a3s_struct.S1s([2 x [3 x %struct.S1]]* @arr_x) +// CHECK-TLS3-NEXT: [[TMP1:%.*]] = call align 16 [2 x [3 x %struct.S1]]* @llvm.threadlocal.address.p0a2a3s_struct.S1s([2 x [3 x %struct.S1]]* align 16 @arr_x) // CHECK-TLS3-NEXT: ret [2 x [3 x %struct.S1]]* [[TMP1]] // // // CHECK-TLS3-LABEL: define {{[^@]+}}@_ZTWN2STI2S4E2stE // CHECK-TLS3-SAME: () #[[ATTR6]] comdat { // CHECK-TLS3-NEXT: call void @_ZTHN2STI2S4E2stE() -// CHECK-TLS3-NEXT: [[TMP1:%.*]] = call %struct.S4* @llvm.threadlocal.address.p0s_struct.S4s(%struct.S4* @_ZN2STI2S4E2stE) +// CHECK-TLS3-NEXT: [[TMP1:%.*]] = call align 4 %struct.S4* @llvm.threadlocal.address.p0s_struct.S4s(%struct.S4* align 4 @_ZN2STI2S4E2stE) // CHECK-TLS3-NEXT: ret %struct.S4* [[TMP1]] // // @@ -4915,12 +4915,12 @@ int foobar() { // CHECK-TLS3-NEXT: [[TMP12:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG281:![0-9]+]] // CHECK-TLS3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP12]], [[TMP11]], !dbg [[DBG281]] // CHECK-TLS3-NEXT: store i32 [[ADD7]], i32* [[RES]], align 4, !dbg [[DBG281]] -// CHECK-TLS3-NEXT: [[TMP13:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZN2STIiE2stE), !dbg [[DBG282:![0-9]+]] +// CHECK-TLS3-NEXT: [[TMP13:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @_ZN2STIiE2stE), !dbg [[DBG282:![0-9]+]] // CHECK-TLS3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4, !dbg [[DBG282]] // CHECK-TLS3-NEXT: [[TMP15:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG283:![0-9]+]] // CHECK-TLS3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], [[TMP14]], !dbg [[DBG283]] // CHECK-TLS3-NEXT: store i32 [[ADD8]], i32* [[RES]], align 4, !dbg [[DBG283]] -// CHECK-TLS3-NEXT: [[TMP16:%.*]] = call float* @llvm.threadlocal.address.p0f32(float* @_ZN2STIfE2stE), !dbg [[DBG284:![0-9]+]] +// CHECK-TLS3-NEXT: [[TMP16:%.*]] = call align 4 float* @llvm.threadlocal.address.p0f32(float* align 4 @_ZN2STIfE2stE), !dbg [[DBG284:![0-9]+]] // CHECK-TLS3-NEXT: [[TMP17:%.*]] = load float, float* [[TMP16]], align 4, !dbg [[DBG284]] // CHECK-TLS3-NEXT: [[CONV:%.*]] = fptosi float [[TMP17]] to i32, !dbg [[DBG284]] // CHECK-TLS3-NEXT: [[TMP18:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG285:![0-9]+]] @@ -5050,7 +5050,7 @@ int foobar() { // CHECK-TLS4-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], %struct.S3* [[TMP4]], i32 0, i32 0, !dbg [[DBG124:![0-9]+]] // CHECK-TLS4-NEXT: [[TMP5:%.*]] = load i32, i32* [[A1]], align 4, !dbg [[DBG124]] // CHECK-TLS4-NEXT: store i32 [[TMP5]], i32* [[RES]], align 4, !dbg [[DBG125:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP6:%.*]] = call %struct.Smain* @llvm.threadlocal.address.p0s_struct.Smains(%struct.Smain* @_ZZ4mainE2sm), !dbg [[DBG126:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP6:%.*]] = call align 8 %struct.Smain* @llvm.threadlocal.address.p0s_struct.Smains(%struct.Smain* align 8 @_ZZ4mainE2sm), !dbg [[DBG126:![0-9]+]] // CHECK-TLS4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[TMP6]], i32 0, i32 0, !dbg [[DBG127:![0-9]+]] // CHECK-TLS4-NEXT: [[TMP7:%.*]] = load i32, i32* [[A2]], align 8, !dbg [[DBG127]] // CHECK-TLS4-NEXT: [[TMP8:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG128:![0-9]+]] @@ -5104,7 +5104,7 @@ int foobar() { // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZTWL3gs1 // CHECK-TLS4-SAME: () #[[ATTR2:[0-9]+]] { // CHECK-TLS4-NEXT: call void @_ZTHL3gs1() -// CHECK-TLS4-NEXT: [[TMP1:%.*]] = call %struct.S1* @llvm.threadlocal.address.p0s_struct.S1s(%struct.S1* @_ZL3gs1) +// CHECK-TLS4-NEXT: [[TMP1:%.*]] = call align 4 %struct.S1* @llvm.threadlocal.address.p0s_struct.S1s(%struct.S1* align 4 @_ZL3gs1) // CHECK-TLS4-NEXT: ret %struct.S1* [[TMP1]] // // @@ -5141,7 +5141,7 @@ int foobar() { // CHECK-TLS4-NEXT: call void @_ZTHN6Static1sE() // CHECK-TLS4-NEXT: br label [[TMP2]] // CHECK-TLS4: 2: -// CHECK-TLS4-NEXT: [[TMP3:%.*]] = call %struct.S3* @llvm.threadlocal.address.p0s_struct.S3s(%struct.S3* @_ZN6Static1sE) +// CHECK-TLS4-NEXT: [[TMP3:%.*]] = call align 4 %struct.S3* @llvm.threadlocal.address.p0s_struct.S3s(%struct.S3* align 4 @_ZN6Static1sE) // CHECK-TLS4-NEXT: ret %struct.S3* [[TMP3]] // // @@ -5152,33 +5152,33 @@ int foobar() { // CHECK-TLS4-NEXT: call void @_ZTH3gs3() // CHECK-TLS4-NEXT: br label [[TMP2]] // CHECK-TLS4: 2: -// CHECK-TLS4-NEXT: [[TMP3:%.*]] = call %struct.S5* @llvm.threadlocal.address.p0s_struct.S5s(%struct.S5* @gs3) +// CHECK-TLS4-NEXT: [[TMP3:%.*]] = call align 4 %struct.S5* @llvm.threadlocal.address.p0s_struct.S5s(%struct.S5* align 4 @gs3) // CHECK-TLS4-NEXT: ret %struct.S5* [[TMP3]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZTW5arr_x // CHECK-TLS4-SAME: () #[[ATTR2]] comdat { // CHECK-TLS4-NEXT: call void @_ZTH5arr_x() -// CHECK-TLS4-NEXT: [[TMP1:%.*]] = call [2 x [3 x %struct.S1]]* @llvm.threadlocal.address.p0a2a3s_struct.S1s([2 x [3 x %struct.S1]]* @arr_x) +// CHECK-TLS4-NEXT: [[TMP1:%.*]] = call align 16 [2 x [3 x %struct.S1]]* @llvm.threadlocal.address.p0a2a3s_struct.S1s([2 x [3 x %struct.S1]]* align 16 @arr_x) // CHECK-TLS4-NEXT: ret [2 x [3 x %struct.S1]]* [[TMP1]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZTWN2STIiE2stE // CHECK-TLS4-SAME: () #[[ATTR2]] comdat { -// CHECK-TLS4-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZN2STIiE2stE) +// CHECK-TLS4-NEXT: [[TMP1:%.*]] = call align 4 i32* @llvm.threadlocal.address.p0i32(i32* align 4 @_ZN2STIiE2stE) // CHECK-TLS4-NEXT: ret i32* [[TMP1]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZTWN2STIfE2stE // CHECK-TLS4-SAME: () #[[ATTR2]] comdat { -// CHECK-TLS4-NEXT: [[TMP1:%.*]] = call float* @llvm.threadlocal.address.p0f32(float* @_ZN2STIfE2stE) +// CHECK-TLS4-NEXT: [[TMP1:%.*]] = call align 4 float* @llvm.threadlocal.address.p0f32(float* align 4 @_ZN2STIfE2stE) // CHECK-TLS4-NEXT: ret float* [[TMP1]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZTWN2STI2S4E2stE // CHECK-TLS4-SAME: () #[[ATTR2]] comdat { // CHECK-TLS4-NEXT: call void @_ZTHN2STI2S4E2stE() -// CHECK-TLS4-NEXT: [[TMP1:%.*]] = call %struct.S4* @llvm.threadlocal.address.p0s_struct.S4s(%struct.S4* @_ZN2STI2S4E2stE) +// CHECK-TLS4-NEXT: [[TMP1:%.*]] = call align 4 %struct.S4* @llvm.threadlocal.address.p0s_struct.S4s(%struct.S4* align 4 @_ZN2STI2S4E2stE) // CHECK-TLS4-NEXT: ret %struct.S4* [[TMP1]] // // diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp index 1dcf5fb0800d..47967b12179d 100644 --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -526,6 +526,14 @@ CallInst *IRBuilderBase::CreateInvariantStart(Value *Ptr, ConstantInt *Size) { return CreateCall(TheFn, Ops); } +static MaybeAlign getAlign(Value *Ptr) { + if (auto *O = dyn_cast(Ptr)) + return O->getAlign(); + if (auto *A = dyn_cast(Ptr)) + return A->getAliaseeObject()->getAlign(); + return {}; +} + CallInst *IRBuilderBase::CreateThreadLocalAddress(Value *Ptr) { #ifndef NDEBUG // Handle specially for constexpr cast. This is possible when @@ -540,8 +548,13 @@ CallInst *IRBuilderBase::CreateThreadLocalAddress(Value *Ptr) { assert(isa(V) && cast(V)->isThreadLocal() && "threadlocal_address only applies to thread local variables."); #endif - return CreateIntrinsic(llvm::Intrinsic::threadlocal_address, {Ptr->getType()}, - {Ptr}); + CallInst *CI = CreateIntrinsic(llvm::Intrinsic::threadlocal_address, + {Ptr->getType()}, {Ptr}); + if (MaybeAlign A = getAlign(Ptr)) { + CI->addParamAttr(0, Attribute::getWithAlignment(CI->getContext(), *A)); + CI->addRetAttr(Attribute::getWithAlignment(CI->getContext(), *A)); + } + return CI; } CallInst *