From c830d517b4e46abb617a80b3967b26546370c928 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Tue, 16 Jun 2020 14:52:03 -0400 Subject: [PATCH] [HIP] Enable -amdgpu-internalize-symbols Enable -amdgpu-internalize-symbols to eliminate unused functions and global variables for whole program to speed up compilation and improve performance. For -fno-gpu-rdc, -amdgpu-internalize-symbols is passed to clang -cc1. For -fgpu-rdc, -amdgpu-internalize-symbols is passed to lld. Differential Revision: https://reviews.llvm.org/D81959 --- clang/lib/Driver/ToolChains/HIP.cpp | 12 ++++++++++-- clang/test/Driver/hip-link-save-temps.hip | 6 ++++-- clang/test/Driver/hip-save-temps.hip | 12 ++++++++++-- clang/test/Driver/hip-toolchain-no-rdc.hip | 12 ++++++++---- clang/test/Driver/hip-toolchain-rdc-separate.hip | 6 ++++-- clang/test/Driver/hip-toolchain-rdc.hip | 6 ++++-- 6 files changed, 40 insertions(+), 14 deletions(-) diff --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp index b76ed7288000..b9143f98f152 100644 --- a/clang/lib/Driver/ToolChains/HIP.cpp +++ b/clang/lib/Driver/ToolChains/HIP.cpp @@ -57,8 +57,14 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA, const llvm::opt::ArgList &Args) const { // Construct lld command. // The output from ld.lld is an HSA code object file. - ArgStringList LldArgs{"-flavor", "gnu", "--no-undefined", - "-shared", "-o", Output.getFilename()}; + ArgStringList LldArgs{"-flavor", + "gnu", + "--no-undefined", + "-shared", + "-mllvm", + "-amdgpu-internalize-symbols", + "-o", + Output.getFilename()}; for (auto Input : Inputs) LldArgs.push_back(Input.getFilename()); const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld")); @@ -143,6 +149,8 @@ void HIPToolChain::addClangTargetOptions( if (DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)) CC1Args.push_back("-fgpu-rdc"); + else + CC1Args.append({"-mllvm", "-amdgpu-internalize-symbols"}); StringRef MaxThreadsPerBlock = DriverArgs.getLastArgValue(options::OPT_gpu_max_threads_per_block_EQ); diff --git a/clang/test/Driver/hip-link-save-temps.hip b/clang/test/Driver/hip-link-save-temps.hip index 080cafc5e417..304be921ebc6 100644 --- a/clang/test/Driver/hip-link-save-temps.hip +++ b/clang/test/Driver/hip-link-save-temps.hip @@ -23,8 +23,10 @@ // CHECK-NOT: llvm-link // CHECK-NOT: opt // CHECK-NOT: llc -// CHECK: "{{.*lld.*}}" {{.*}} "-o" "a.out-hip-amdgcn-amd-amdhsa-gfx900" "obj1-hip-amdgcn-amd-amdhsa-gfx900.o" "obj2-hip-amdgcn-amd-amdhsa-gfx900.o" -// CHECK: "{{.*lld.*}}" {{.*}} "-o" "a.out-hip-amdgcn-amd-amdhsa-gfx906" "obj1-hip-amdgcn-amd-amdhsa-gfx906.o" "obj2-hip-amdgcn-amd-amdhsa-gfx906.o" +// CHECK: "{{.*lld.*}}" {{.*}} "-mllvm" "-amdgpu-internalize-symbols" +// CHECK-SAME: "-o" "a.out-hip-amdgcn-amd-amdhsa-gfx900" "obj1-hip-amdgcn-amd-amdhsa-gfx900.o" "obj2-hip-amdgcn-amd-amdhsa-gfx900.o" +// CHECK: "{{.*lld.*}}" {{.*}} "-mllvm" "-amdgpu-internalize-symbols" +// CHECK-SAME: "-o" "a.out-hip-amdgcn-amd-amdhsa-gfx906" "obj1-hip-amdgcn-amd-amdhsa-gfx906.o" "obj2-hip-amdgcn-amd-amdhsa-gfx906.o" // OUT: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=executable.hipfb" // OUT: "{{.*ld.*}}" {{.*}} "-o" "executable" {{.*}} "-T" "executable.lk" // NOUT: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=a.out.hipfb" diff --git a/clang/test/Driver/hip-save-temps.hip b/clang/test/Driver/hip-save-temps.hip index a44ee0d23307..1ac506aa6fba 100644 --- a/clang/test/Driver/hip-save-temps.hip +++ b/clang/test/Driver/hip-save-temps.hip @@ -35,14 +35,22 @@ // CHECK: {{".*clang.*"}} "-cc1" {{.*}} "-E" {{.*}} [[CPU:"-target-cpu" "gfx900"]] {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.cui" // NORDC: {{".*clang.*"}} "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} [[CPU]] {{.*}} "-disable-llvm-passes" {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.bc" // RDC: {{".*clang.*"}} "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} [[CPU]] {{.*}} "-disable-llvm-passes" {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.tmp.bc" -// NORDC: {{".*clang.*"}} "-cc1" {{.*}} "-S" {{.*}} [[CPU]] {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.s" + +// NORDC: {{".*clang.*"}} "-cc1" {{.*}} "-S" +// NORDC-SAME: "-mllvm" "-amdgpu-internalize-symbols" +// NORDC-SAME: [[CPU]] +// NORDC-SAME: "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.s" + // RDC: {{".*clang.*"}} "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} [[CPU]] {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.bc" // NORDC: {{".*clang.*"}} "-cc1as" {{.*}} "-filetype" "obj" {{.*}} [[CPU]] {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.o" // CHECK-NOT: llvm-link // CHECK-NOT: opt // CHECK-NOT: llc // NORDC: {{.*lld.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.out" -// RDCL: "{{.*lld.*}}" {{.*}} "-o" "a.out-hip-amdgcn-amd-amdhsa-gfx900" + +// RDCL: "{{.*lld.*}}" {{.*}} "-mllvm" "-amdgpu-internalize-symbols" +// RDCL-SAME: "-o" "a.out-hip-amdgcn-amd-amdhsa-gfx900" + // NORDC: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=hip-save-temps.hip-hip-amdgcn-amd-amdhsa.hipfb" // CHECK: "{{.*clang.*}}" "-cc1" {{.*}} "-E" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.cui" // NORDC: "{{.*clang.*}}" "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} "-fcuda-include-gpubinary" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.bc" diff --git a/clang/test/Driver/hip-toolchain-no-rdc.hip b/clang/test/Driver/hip-toolchain-no-rdc.hip index 937da40049e7..c03c78a97c94 100644 --- a/clang/test/Driver/hip-toolchain-no-rdc.hip +++ b/clang/test/Driver/hip-toolchain-no-rdc.hip @@ -38,7 +38,8 @@ // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-obj" // CHECK-SAME: {{.*}} "-main-file-name" "a.cu" -// CHECK-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden" +// CHECK-SAME: "-fcuda-is-device" "-mllvm" "-amdgpu-internalize-symbols" +// CHECK-SAME: "-fcuda-allow-variadic-functions" "-fvisibility" "hidden" // CHECK-SAME: "-fapply-global-visibility-to-externs" // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc" // CHECK-SAME: "-target-cpu" "gfx803" @@ -60,7 +61,8 @@ // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-obj" // CHECK-SAME: {{.*}} "-main-file-name" "a.cu" -// CHECK-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden" +// CHECK-SAME: "-fcuda-is-device" "-mllvm" "-amdgpu-internalize-symbols" +// CHECK-SAME: "-fcuda-allow-variadic-functions" "-fvisibility" "hidden" // CHECK-SAME: "-fapply-global-visibility-to-externs" // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc" // CHECK-SAME: "-target-cpu" "gfx900" @@ -98,7 +100,8 @@ // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-obj" // CHECK-SAME: {{.*}} "-main-file-name" "b.hip" -// CHECK-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden" +// CHECK-SAME: "-fcuda-is-device" "-mllvm" "-amdgpu-internalize-symbols" +// CHECK-SAME: "-fcuda-allow-variadic-functions" "-fvisibility" "hidden" // CHECK-SAME: "-fapply-global-visibility-to-externs" // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc" // CHECK-SAME: "-target-cpu" "gfx803" @@ -120,7 +123,8 @@ // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-obj" // CHECK-SAME: {{.*}} "-main-file-name" "b.hip" -// CHECK-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden" +// CHECK-SAME: "-fcuda-is-device" "-mllvm" "-amdgpu-internalize-symbols" +// CHECK-SAME: "-fcuda-allow-variadic-functions" "-fvisibility" "hidden" // CHECK-SAME: "-fapply-global-visibility-to-externs" // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc" // CHECK-SAME: "-target-cpu" "gfx900" diff --git a/clang/test/Driver/hip-toolchain-rdc-separate.hip b/clang/test/Driver/hip-toolchain-rdc-separate.hip index c02e43d613a7..25661f399a82 100644 --- a/clang/test/Driver/hip-toolchain-rdc-separate.hip +++ b/clang/test/Driver/hip-toolchain-rdc-separate.hip @@ -97,12 +97,14 @@ // LINK-NOT: "*.llvm-link" // LINK-NOT: ".*opt" // LINK-NOT: ".*llc" -// LINK: {{".*lld.*"}} {{.*}} "-o" "[[IMG_DEV1:.*.out]]" "[[A_BC1]]" "[[B_BC1]]" +// LINK: {{".*lld.*"}} {{.*}} "-mllvm" "-amdgpu-internalize-symbols" +// LINK-SAME: "-o" "[[IMG_DEV1:.*.out]]" "[[A_BC1]]" "[[B_BC1]]" // LINK-NOT: "*.llvm-link" // LINK-NOT: ".*opt" // LINK-NOT: ".*llc" -// LINK: {{".*lld.*"}} {{.*}} "-o" "[[IMG_DEV2:.*.out]]" "[[A_BC2]]" "[[B_BC2]]" +// LINK: {{".*lld.*"}} {{.*}} "-mllvm" "-amdgpu-internalize-symbols" +// LINK-SAME: "-o" "[[IMG_DEV2:.*.out]]" "[[A_BC2]]" "[[B_BC2]]" // LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" // LINK-SAME: "-targets={{.*}},hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" diff --git a/clang/test/Driver/hip-toolchain-rdc.hip b/clang/test/Driver/hip-toolchain-rdc.hip index 091a6c7ac83c..f520236abcb9 100644 --- a/clang/test/Driver/hip-toolchain-rdc.hip +++ b/clang/test/Driver/hip-toolchain-rdc.hip @@ -37,7 +37,8 @@ // CHECK-NOT: "*.llvm-link" // CHECK-NOT: ".*opt" // CHECK-NOT: ".*llc" -// CHECK: {{".*lld.*"}} {{.*}} "-o" "[[IMG_DEV1:.*.out]]" [[A_BC1]] [[B_BC1]] +// CHECK: {{".*lld.*"}} {{.*}} "-mllvm" "-amdgpu-internalize-symbols" +// CHECK-SAME: "-o" "[[IMG_DEV1:.*.out]]" [[A_BC1]] [[B_BC1]] // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" @@ -62,7 +63,8 @@ // CHECK-NOT: "*.llvm-link" // CHECK-NOT: ".*opt" // CHECK-NOT: ".*llc" -// CHECK: {{".*lld.*"}} {{.*}} "-o" "[[IMG_DEV2:.*.out]]" [[A_BC2]] [[B_BC2]] +// CHECK: {{".*lld.*"}} {{.*}} "-mllvm" "-amdgpu-internalize-symbols" +// CHECK-SAME: "-o" "[[IMG_DEV2:.*.out]]" [[A_BC2]] [[B_BC2]] // CHECK: [[CLANG]] "-cc1" "-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa"