[OpenMP] Replace device kernel linkage with weak_odr

Currently the device kernels all have weak linkage to prevent linkage errors on multiple defintions. However, this prevents some optimizations from adequately analyzing them because of the nature of weak linkage. This patch replaces the weak linkage with weak_odr linkage so we can statically assert that multiple declarations of the same kernel will have the same definition. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D122443
2022-03-24 19:32:05 -04:00 · 2022-03-24 19:32:05 -04:00 · b9f67d44ba
parent 5975f1c5f9
commit b9f67d44ba
10 changed files with 30 additions and 30 deletions
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@ -6537,7 +6537,7 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(

  if (CGM.getLangOpts().OpenMPIsDevice) {
    OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
-    OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
+    OutlinedFn->setLinkage(llvm::GlobalValue::WeakODRLinkage);
    OutlinedFn->setDSOLocal(false);
    if (CGM.getTriple().isAMDGCN())
      OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
--- a/clang/test/OpenMP/amdgcn_target_codegen.cpp
+++ b/clang/test/OpenMP/amdgcn_target_codegen.cpp
@ -9,7 +9,7 @@
 #define N 1000

 int test_amdgcn_target_tid_threads() {
-// CHECK-LABEL: define weak amdgpu_kernel void @{{.*}}test_amdgcn_target_tid_threads
+// CHECK-LABEL: define weak_odr amdgpu_kernel void @{{.*}}test_amdgcn_target_tid_threads

  int arr[N];

@ -23,7 +23,7 @@ int test_amdgcn_target_tid_threads() {
 }

 int test_amdgcn_target_tid_threads_simd() {
-// CHECK-LABEL: define weak amdgpu_kernel void @{{.*}}test_amdgcn_target_tid_threads_simd
+// CHECK-LABEL: define weak_odr amdgpu_kernel void @{{.*}}test_amdgcn_target_tid_threads_simd

  int arr[N];

--- a/clang/test/OpenMP/assumes_include_nvptx.cpp
+++ b/clang/test/OpenMP/assumes_include_nvptx.cpp
@ -11,11 +11,11 @@

 // TODO: Think about teaching the OMPIRBuilder about default attributes as well so the __kmpc* declarations are annotated.

-// CHECK: define weak void @__omp_offloading_{{.*}}__Z17complex_reductionIfEvv_{{.*}}() [[attr0:#[0-9]]]
+// CHECK: define weak_odr void @__omp_offloading_{{.*}}__Z17complex_reductionIfEvv_{{.*}}() [[attr0:#[0-9]]]
 // CHECK: call i32 @__kmpc_target_init(
 // CHECK: declare noundef float @_Z3sinf(float noundef) [[attr1:#[0-9]*]]
 // CHECK: declare void @__kmpc_target_deinit(
-// CHECK: define weak void @__omp_offloading_{{.*}}__Z17complex_reductionIdEvv_{{.*}}() [[attr0]]
+// CHECK: define weak_odr void @__omp_offloading_{{.*}}__Z17complex_reductionIdEvv_{{.*}}() [[attr0]]
 // CHECK: %call = call noundef double @_Z3sind(double noundef 0.000000e+00) [[attr2:#[0-9]]]
 // CHECK: declare noundef double @_Z3sind(double noundef) [[attr1]]

--- a/clang/test/OpenMP/declare_target_codegen.cpp
+++ b/clang/test/OpenMP/declare_target_codegen.cpp
@ -140,7 +140,7 @@ int bar() { return 1 + foo() + bar() + baz1() + baz2(); }
 int maini1() {
  int a;
  static long aa = 32 + bbb + ccc + fff + ggg;
-// CHECK-DAG: define weak void @__omp_offloading_{{.*}}maini1{{.*}}_l[[@LINE+1]](i32* noundef nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %{{.*}}, i64 {{.*}}, i64 {{.*}})
+// CHECK-DAG: define weak_odr void @__omp_offloading_{{.*}}maini1{{.*}}_l[[@LINE+1]](i32* noundef nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %{{.*}}, i64 {{.*}}, i64 {{.*}})
 #pragma omp target map(tofrom \
                       : a, b)
  {
@ -153,7 +153,7 @@ int maini1() {

 int baz3() { return 2 + baz2(); }
 int baz2() {
-// CHECK-DAG: define weak void @__omp_offloading_{{.*}}baz2{{.*}}_l[[@LINE+1]](i64 {{.*}})
+// CHECK-DAG: define weak_odr void @__omp_offloading_{{.*}}baz2{{.*}}_l[[@LINE+1]](i64 {{.*}})
 #pragma omp target parallel
  ++c;
  return 2 + baz3();
@ -165,7 +165,7 @@ static __typeof(create) __t_create __attribute__((__weakref__("__create")));

 int baz5() {
  bool a;
-// CHECK-DAG: define weak void @__omp_offloading_{{.*}}baz5{{.*}}_l[[@LINE+1]](i64 {{.*}})
+// CHECK-DAG: define weak_odr void @__omp_offloading_{{.*}}baz5{{.*}}_l[[@LINE+1]](i64 {{.*}})
 #pragma omp target
  a = __extension__(void *) & __t_create != 0;
  return a;
--- a/clang/test/OpenMP/declare_target_link_codegen.cpp
+++ b/clang/test/OpenMP/declare_target_link_codegen.cpp
@ -50,7 +50,7 @@ int maini1() {
  return 0;
 }

-// DEVICE: define weak void @__omp_offloading_{{.*}}_{{.*}}maini1{{.*}}_l42(i32* noundef nonnull align {{[0-9]+}} dereferenceable{{[^,]*}}
+// DEVICE: define weak_odr void @__omp_offloading_{{.*}}_{{.*}}maini1{{.*}}_l42(i32* noundef nonnull align {{[0-9]+}} dereferenceable{{[^,]*}}
 // DEVICE: [[C_REF:%.+]] = load i32*, i32** @c_decl_tgt_ref_ptr,
 // DEVICE: [[C:%.+]] = load i32, i32* [[C_REF]],
 // DEVICE: store i32 [[C]], i32* %
--- a/clang/test/OpenMP/metadirective_device_isa_codegen_amdgcn.cpp
+++ b/clang/test/OpenMP/metadirective_device_isa_codegen_amdgcn.cpp
@ -22,7 +22,7 @@ int amdgcn_device_isa_selected() {
  return threadCount;
 }

-// CHECK: define weak amdgpu_kernel void @__omp_offloading_{{.*}}amdgcn_device_isa_selected
+// CHECK: define weak_odr amdgpu_kernel void @__omp_offloading_{{.*}}amdgcn_device_isa_selected
 // CHECK: user_code.entry:
 // CHECK: call void @__kmpc_parallel_51
 // CHECK-NOT: call i32 @__kmpc_single
@ -44,7 +44,7 @@ int amdgcn_device_isa_not_selected() {

  return threadCount;
 }
-// CHECK: define weak amdgpu_kernel void @__omp_offloading_{{.*}}amdgcn_device_isa_not_selected
+// CHECK: define weak_odr amdgpu_kernel void @__omp_offloading_{{.*}}amdgcn_device_isa_not_selected
 // CHECK: user_code.entry:
 // CHECK: call i32 @__kmpc_single
 // CHECK-NOT: call void @__kmpc_parallel_51
--- a/clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp
@ -95,7 +95,7 @@ int foo(int n, double *ptr) {
    ptr[0]++;
  }

-  // TCHECK:  define weak void @__omp_offloading_{{.+}}(double* noundef [[PTR_IN:%.+]])
+  // TCHECK:  define weak_odr void @__omp_offloading_{{.+}}(double* noundef [[PTR_IN:%.+]])
  // TCHECK:  [[PTR_ADDR:%.+]] = alloca double*,
  // TCHECK-NOT: alloca double*,
  // TCHECK:  store double* [[PTR_IN]], double** [[PTR_ADDR]],
--- a/clang/test/OpenMP/target_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_firstprivate_codegen.cpp
@ -143,7 +143,7 @@ int foo(int n, double *ptr) {
  // CHECK:  [[PTR_GEP_ARG:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PTR_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
  // CHECK:  {{.+}} = call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}}, i64 -1, {{.+}}, i32 3, i8** [[BASE_PTR_GEP_ARG]], i8** [[PTR_GEP_ARG]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i8** null, i8** null)

-  // TCHECK:  define weak void @__omp_offloading_{{.+}}(i{{[0-9]+}} noundef [[A_IN:%.+]], i32** noundef nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) [[P_IN:%.+]], i{{[0-9]+}} noundef [[GA_IN:%.+]])
+  // TCHECK:  define weak_odr void @__omp_offloading_{{.+}}(i{{[0-9]+}} noundef [[A_IN:%.+]], i32** noundef nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) [[P_IN:%.+]], i{{[0-9]+}} noundef [[GA_IN:%.+]])
  // TCHECK:  [[A_ADDR:%.+]] = alloca i{{[0-9]+}},
  // TCHECK:  [[P_ADDR:%.+]] = alloca i32**,
  // TCHECK:  [[GA_ADDR:%.+]] = alloca i{{64|32}},
@ -352,7 +352,7 @@ int foo(int n, double *ptr) {
  // CHECK:  [[PTR_GEP_ARG3:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[PTR_ARR3]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
  // CHECK: {{.+}} = call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}}, i64 -1, {{.+}}, i32 2, i8** [[BASE_PTR_GEP_ARG3]], i8** [[PTR_GEP_ARG3]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i8** null, i8** null)

-  // TCHECK:  define weak void @__omp_offloading_{{.+}}(double* noundef [[PTR_IN:%.+]], [[TTII]]* noundef nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) [[E:%.+]])
+  // TCHECK:  define weak_odr void @__omp_offloading_{{.+}}(double* noundef [[PTR_IN:%.+]], [[TTII]]* noundef nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) [[E:%.+]])
  // TCHECK:  [[PTR_ADDR:%.+]] = alloca double*,
  // TCHECK-NOT: alloca [[TTII]],
  // TCHECK-NOT: alloca double*,
@ -391,7 +391,7 @@ static int fstatic(int n) {
  return a;
 }

-// TCHECK: define weak void @__omp_offloading_{{.+}}(i{{[0-9]+}} noundef [[A_IN:%.+]], i{{[0-9]+}} noundef [[A3_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]])
+// TCHECK: define weak_odr void @__omp_offloading_{{.+}}(i{{[0-9]+}} noundef [[A_IN:%.+]], i{{[0-9]+}} noundef [[A3_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]])
 // TCHECK:  [[A_ADDR:%.+]] = alloca i{{[0-9]+}},
 // TCHECK:  [[A3_ADDR:%.+]] = alloca i{{[0-9]+}},
 // TCHECK:  [[B_ADDR:%.+]] = alloca [10 x i{{[0-9]+}}]*,
@ -479,7 +479,7 @@ struct S1 {
  // only check that we use the map types stored in the global variable
  // CHECK:  call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}}, i64 -1, {{.+}}, i32 5, i8** {{.+}}, i8** {{.+}}, i{{[0-9]+}}* {{.+}}, i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT4]], i32 0, i32 0), i8** null, i8** null)

-  // TCHECK: define weak void @__omp_offloading_{{.+}}([[S1]]* noundef [[TH:%.+]], i{{[0-9]+}} noundef [[B_IN:%.+]], i{{[0-9]+}} noundef [[VLA:%.+]], i{{[0-9]+}} noundef [[VLA1:%.+]], i{{[0-9]+}}{{.+}} [[C_IN:%.+]])
+  // TCHECK: define weak_odr void @__omp_offloading_{{.+}}([[S1]]* noundef [[TH:%.+]], i{{[0-9]+}} noundef [[B_IN:%.+]], i{{[0-9]+}} noundef [[VLA:%.+]], i{{[0-9]+}} noundef [[VLA1:%.+]], i{{[0-9]+}}{{.+}} [[C_IN:%.+]])
  // TCHECK:  [[TH_ADDR:%.+]] = alloca [[S1]]*,
  // TCHECK:  [[B_ADDR:%.+]] = alloca i{{[0-9]+}},
  // TCHECK:  [[VLA_ADDR:%.+]] = alloca i{{[0-9]+}},
@ -587,7 +587,7 @@ int bar(int n, double *ptr) {

 // CHECK:  call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}}, i64 -1, {{.+}}, i32 2, i8** {{.+}}, i8** {{.+}}, i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT6]], i32 0, i32 0), i8** null, i8** null)

-// TCHECK: define weak void @__omp_offloading_{{.+}}(i{{[0-9]+}} noundef [[A_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]])
+// TCHECK: define weak_odr void @__omp_offloading_{{.+}}(i{{[0-9]+}} noundef [[A_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]])
 // TCHECK:  [[A_ADDR:%.+]] = alloca i{{[0-9]+}},
 // TCHECK:  [[B_ADDR:%.+]] = alloca [10 x i{{[0-9]+}}]*,
 // TCHECK-NOT: alloca i{{[0-9]+}},
--- a/clang/test/OpenMP/target_private_codegen.cpp
+++ b/clang/test/OpenMP/target_private_codegen.cpp
@ -45,7 +45,7 @@ int foo(int n) {
  {
  }

-  // TCHECK:  define weak void @__omp_offloading_{{.+}}()
+  // TCHECK:  define weak_odr void @__omp_offloading_{{.+}}()
  // TCHECK:  [[A:%.+]] = alloca i{{[0-9]+}},
  // TCHECK-NOT: store {{.+}}, {{.+}} [[A]],
  // TCHECK:  ret void
@ -55,7 +55,7 @@ int foo(int n) {
    a = 1;
  }

-  // TCHECK:  define weak void @__omp_offloading_{{.+}}()
+  // TCHECK:  define weak_odr void @__omp_offloading_{{.+}}()
  // TCHECK:  [[A:%.+]] = alloca i{{[0-9]+}},
  // TCHECK:  store i{{[0-9]+}} 1, i{{[0-9]+}}* [[A]],
  // TCHECK:  ret void
@ -66,7 +66,7 @@ int foo(int n) {
    aa = 1;
  }

-  // TCHECK:  define weak void @__omp_offloading_{{.+}}()
+  // TCHECK:  define weak_odr void @__omp_offloading_{{.+}}()
  // TCHECK:  [[A:%.+]] = alloca i{{[0-9]+}},
  // TCHECK:  [[A2:%.+]] = alloca i{{[0-9]+}},
  // TCHECK:  store i{{[0-9]+}} 1, i{{[0-9]+}}* [[A]],
@ -85,7 +85,7 @@ int foo(int n) {
  }
  // make sure that private variables are generated in all cases and that we use those instances for operations inside the
  // target region
-  // TCHECK:  define weak void @__omp_offloading_{{.+}}(i{{[0-9]+}} noundef [[VLA:%.+]], i{{[0-9]+}} noundef [[VLA1:%.+]], i{{[0-9]+}} noundef [[VLA3:%.+]])
+  // TCHECK:  define weak_odr void @__omp_offloading_{{.+}}(i{{[0-9]+}} noundef [[VLA:%.+]], i{{[0-9]+}} noundef [[VLA1:%.+]], i{{[0-9]+}} noundef [[VLA3:%.+]])
  // TCHECK:  [[VLA_ADDR:%.+]] = alloca i{{[0-9]+}},
  // TCHECK:  [[VLA_ADDR2:%.+]] = alloca i{{[0-9]+}},
  // TCHECK:  [[VLA_ADDR4:%.+]] = alloca i{{[0-9]+}},
@ -179,7 +179,7 @@ int fstatic(int n) {
  return a;
 }

-// TCHECK: define weak void @__omp_offloading_{{.+}}()
+// TCHECK: define weak_odr void @__omp_offloading_{{.+}}()
 // TCHECK:  [[A:%.+]] = alloca i{{[0-9]+}},
 // TCHECK:  [[A2:%.+]] = alloca i{{[0-9]+}},
 // TCHECK:  [[A3:%.+]] = alloca i{{[0-9]+}},
@ -207,7 +207,7 @@ struct S1 {
    return c[1][1] + (int)b;
  }

-  // TCHECK: define weak void @__omp_offloading_{{.+}}([[S1]]* noundef [[TH:%.+]], i{{[0-9]+}} noundef [[VLA:%.+]], i{{[0-9]+}} noundef [[VLA1:%.+]])
+  // TCHECK: define weak_odr void @__omp_offloading_{{.+}}([[S1]]* noundef [[TH:%.+]], i{{[0-9]+}} noundef [[VLA:%.+]], i{{[0-9]+}} noundef [[VLA1:%.+]])
  // TCHECK: [[TH_ADDR:%.+]] = alloca [[S1]]*,
  // TCHECK: [[VLA_ADDR:%.+]] = alloca i{{[0-9]+}},
  // TCHECK: [[VLA_ADDR2:%.+]] = alloca i{{[0-9]+}},
@ -261,7 +261,7 @@ int bar(int n){
 }

 // template
-// TCHECK: define weak void @__omp_offloading_{{.+}}()
+// TCHECK: define weak_odr void @__omp_offloading_{{.+}}()
 // TCHECK: [[A:%.+]] = alloca i{{[0-9]+}},
 // TCHECK: [[A2:%.+]] = alloca i{{[0-9]+}},
 // TCHECK: [[B:%.+]] = alloca [10 x i{{[0-9]+}}],
--- a/clang/test/OpenMP/target_reduction_codegen.cpp
+++ b/clang/test/OpenMP/target_reduction_codegen.cpp
@ -45,7 +45,7 @@ int foo(int n) {
  {
  }

-  // TCHECK: define weak void @__omp_offloading_{{.+}}(i32*{{.+}} %{{.+}})
+  // TCHECK: define weak_odr void @__omp_offloading_{{.+}}(i32*{{.+}} %{{.+}})
  // TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}*,
  // TCHECK: store {{.+}}, {{.+}} [[A]],
  // TCHECK: load i32*, i32** [[A]],
@ -56,7 +56,7 @@ int foo(int n) {
    a = 1;
  }

-  // TCHECK:  define weak void @__omp_offloading_{{.+}}(i32*{{.+}} %{{.+}})
+  // TCHECK:  define weak_odr void @__omp_offloading_{{.+}}(i32*{{.+}} %{{.+}})
  // TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}*,
  // TCHECK: store {{.+}}, {{.+}} [[A]],
  // TCHECK: [[REF:%.+]] = load i32*, i32** [[A]],
@ -69,7 +69,7 @@ int foo(int n) {
    aa = 1;
  }

-  // TCHECK:  define weak void @__omp_offloading_{{.+}}(i32*{{.+}} [[A:%.+]], i16*{{.+}} [[AA:%.+]])
+  // TCHECK:  define weak_odr void @__omp_offloading_{{.+}}(i32*{{.+}} [[A:%.+]], i16*{{.+}} [[AA:%.+]])
  // TCHECK:  [[A:%.+]] = alloca i{{[0-9]+}}*,
  // TCHECK:  [[AA:%.+]] = alloca i{{[0-9]+}}*,
  // TCHECK: store {{.+}}, {{.+}} [[A]],
@ -118,7 +118,7 @@ int fstatic(int n) {
  return a;
 }

-// TCHECK: define weak void @__omp_offloading_{{.+}}(i32*{{.+}}, i16*{{.+}}, i8*{{.+}}, [10 x i32]*{{.+}})
+// TCHECK: define weak_odr void @__omp_offloading_{{.+}}(i32*{{.+}}, i16*{{.+}}, i8*{{.+}}, [10 x i32]*{{.+}})
 // TCHECK:  [[A:%.+]] = alloca i{{[0-9]+}}*,
 // TCHECK:  [[A2:%.+]] = alloca i{{[0-9]+}}*,
 // TCHECK:  [[A3:%.+]] = alloca i{{[0-9]+}}*,
@ -154,7 +154,7 @@ struct S1 {
    return c[1][1] + (int)b;
  }

-  // TCHECK: define weak void @__omp_offloading_{{.+}}([[S1]]* noundef [[TH:%.+]], i32*{{.+}}, i{{[0-9]+}} noundef [[VLA:%.+]], i{{[0-9]+}} noundef [[VLA1:%.+]], i16*{{.+}})
+  // TCHECK: define weak_odr void @__omp_offloading_{{.+}}([[S1]]* noundef [[TH:%.+]], i32*{{.+}}, i{{[0-9]+}} noundef [[VLA:%.+]], i{{[0-9]+}} noundef [[VLA1:%.+]], i16*{{.+}})
  // TCHECK: [[TH_ADDR:%.+]] = alloca [[S1]]*,
  // TCHECK: [[B_ADDR:%.+]] = alloca i{{[0-9]+}}*,
  // TCHECK: [[VLA_ADDR:%.+]] = alloca i{{[0-9]+}},
@ -206,7 +206,7 @@ int bar(int n){
 }

 // template
-// TCHECK: define weak void @__omp_offloading_{{.+}}(i{{[0-9]+}}*{{.+}}, i{{[0-9]+}}*{{.+}}, [10 x i32]*{{.+}})
+// TCHECK: define weak_odr void @__omp_offloading_{{.+}}(i{{[0-9]+}}*{{.+}}, i{{[0-9]+}}*{{.+}}, [10 x i32]*{{.+}})
 // TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}*,
 // TCHECK: [[A2:%.+]] = alloca i{{[0-9]+}}*,
 // TCHECK: [[B:%.+]] = alloca [10 x i{{[0-9]+}}]*,