[HIP] Enable -amdgpu-internalize-symbols

Enable -amdgpu-internalize-symbols to eliminate unused functions and global variables
for whole program to speed up compilation and improve performance.

For -fno-gpu-rdc, -amdgpu-internalize-symbols is passed to clang -cc1.

For -fgpu-rdc, -amdgpu-internalize-symbols is passed to lld.

Differential Revision: https://reviews.llvm.org/D81959
This commit is contained in:
Yaxun (Sam) Liu 2020-06-16 14:52:03 -04:00
parent afd43a7a78
commit c830d517b4
6 changed files with 40 additions and 14 deletions

View File

@ -57,8 +57,14 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
const llvm::opt::ArgList &Args) const {
// Construct lld command.
// The output from ld.lld is an HSA code object file.
ArgStringList LldArgs{"-flavor", "gnu", "--no-undefined",
"-shared", "-o", Output.getFilename()};
ArgStringList LldArgs{"-flavor",
"gnu",
"--no-undefined",
"-shared",
"-mllvm",
"-amdgpu-internalize-symbols",
"-o",
Output.getFilename()};
for (auto Input : Inputs)
LldArgs.push_back(Input.getFilename());
const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld"));
@ -143,6 +149,8 @@ void HIPToolChain::addClangTargetOptions(
if (DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
false))
CC1Args.push_back("-fgpu-rdc");
else
CC1Args.append({"-mllvm", "-amdgpu-internalize-symbols"});
StringRef MaxThreadsPerBlock =
DriverArgs.getLastArgValue(options::OPT_gpu_max_threads_per_block_EQ);

View File

@ -23,8 +23,10 @@
// CHECK-NOT: llvm-link
// CHECK-NOT: opt
// CHECK-NOT: llc
// CHECK: "{{.*lld.*}}" {{.*}} "-o" "a.out-hip-amdgcn-amd-amdhsa-gfx900" "obj1-hip-amdgcn-amd-amdhsa-gfx900.o" "obj2-hip-amdgcn-amd-amdhsa-gfx900.o"
// CHECK: "{{.*lld.*}}" {{.*}} "-o" "a.out-hip-amdgcn-amd-amdhsa-gfx906" "obj1-hip-amdgcn-amd-amdhsa-gfx906.o" "obj2-hip-amdgcn-amd-amdhsa-gfx906.o"
// CHECK: "{{.*lld.*}}" {{.*}} "-mllvm" "-amdgpu-internalize-symbols"
// CHECK-SAME: "-o" "a.out-hip-amdgcn-amd-amdhsa-gfx900" "obj1-hip-amdgcn-amd-amdhsa-gfx900.o" "obj2-hip-amdgcn-amd-amdhsa-gfx900.o"
// CHECK: "{{.*lld.*}}" {{.*}} "-mllvm" "-amdgpu-internalize-symbols"
// CHECK-SAME: "-o" "a.out-hip-amdgcn-amd-amdhsa-gfx906" "obj1-hip-amdgcn-amd-amdhsa-gfx906.o" "obj2-hip-amdgcn-amd-amdhsa-gfx906.o"
// OUT: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=executable.hipfb"
// OUT: "{{.*ld.*}}" {{.*}} "-o" "executable" {{.*}} "-T" "executable.lk"
// NOUT: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=a.out.hipfb"

View File

@ -35,14 +35,22 @@
// CHECK: {{".*clang.*"}} "-cc1" {{.*}} "-E" {{.*}} [[CPU:"-target-cpu" "gfx900"]] {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.cui"
// NORDC: {{".*clang.*"}} "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} [[CPU]] {{.*}} "-disable-llvm-passes" {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.bc"
// RDC: {{".*clang.*"}} "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} [[CPU]] {{.*}} "-disable-llvm-passes" {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.tmp.bc"
// NORDC: {{".*clang.*"}} "-cc1" {{.*}} "-S" {{.*}} [[CPU]] {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.s"
// NORDC: {{".*clang.*"}} "-cc1" {{.*}} "-S"
// NORDC-SAME: "-mllvm" "-amdgpu-internalize-symbols"
// NORDC-SAME: [[CPU]]
// NORDC-SAME: "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.s"
// RDC: {{".*clang.*"}} "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} [[CPU]] {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.bc"
// NORDC: {{".*clang.*"}} "-cc1as" {{.*}} "-filetype" "obj" {{.*}} [[CPU]] {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.o"
// CHECK-NOT: llvm-link
// CHECK-NOT: opt
// CHECK-NOT: llc
// NORDC: {{.*lld.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.out"
// RDCL: "{{.*lld.*}}" {{.*}} "-o" "a.out-hip-amdgcn-amd-amdhsa-gfx900"
// RDCL: "{{.*lld.*}}" {{.*}} "-mllvm" "-amdgpu-internalize-symbols"
// RDCL-SAME: "-o" "a.out-hip-amdgcn-amd-amdhsa-gfx900"
// NORDC: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=hip-save-temps.hip-hip-amdgcn-amd-amdhsa.hipfb"
// CHECK: "{{.*clang.*}}" "-cc1" {{.*}} "-E" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.cui"
// NORDC: "{{.*clang.*}}" "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} "-fcuda-include-gpubinary" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.bc"

View File

@ -38,7 +38,8 @@
// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
// CHECK-SAME: "-emit-obj"
// CHECK-SAME: {{.*}} "-main-file-name" "a.cu"
// CHECK-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden"
// CHECK-SAME: "-fcuda-is-device" "-mllvm" "-amdgpu-internalize-symbols"
// CHECK-SAME: "-fcuda-allow-variadic-functions" "-fvisibility" "hidden"
// CHECK-SAME: "-fapply-global-visibility-to-externs"
// CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc"
// CHECK-SAME: "-target-cpu" "gfx803"
@ -60,7 +61,8 @@
// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
// CHECK-SAME: "-emit-obj"
// CHECK-SAME: {{.*}} "-main-file-name" "a.cu"
// CHECK-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden"
// CHECK-SAME: "-fcuda-is-device" "-mllvm" "-amdgpu-internalize-symbols"
// CHECK-SAME: "-fcuda-allow-variadic-functions" "-fvisibility" "hidden"
// CHECK-SAME: "-fapply-global-visibility-to-externs"
// CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc"
// CHECK-SAME: "-target-cpu" "gfx900"
@ -98,7 +100,8 @@
// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
// CHECK-SAME: "-emit-obj"
// CHECK-SAME: {{.*}} "-main-file-name" "b.hip"
// CHECK-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden"
// CHECK-SAME: "-fcuda-is-device" "-mllvm" "-amdgpu-internalize-symbols"
// CHECK-SAME: "-fcuda-allow-variadic-functions" "-fvisibility" "hidden"
// CHECK-SAME: "-fapply-global-visibility-to-externs"
// CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc"
// CHECK-SAME: "-target-cpu" "gfx803"
@ -120,7 +123,8 @@
// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
// CHECK-SAME: "-emit-obj"
// CHECK-SAME: {{.*}} "-main-file-name" "b.hip"
// CHECK-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden"
// CHECK-SAME: "-fcuda-is-device" "-mllvm" "-amdgpu-internalize-symbols"
// CHECK-SAME: "-fcuda-allow-variadic-functions" "-fvisibility" "hidden"
// CHECK-SAME: "-fapply-global-visibility-to-externs"
// CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc"
// CHECK-SAME: "-target-cpu" "gfx900"

View File

@ -97,12 +97,14 @@
// LINK-NOT: "*.llvm-link"
// LINK-NOT: ".*opt"
// LINK-NOT: ".*llc"
// LINK: {{".*lld.*"}} {{.*}} "-o" "[[IMG_DEV1:.*.out]]" "[[A_BC1]]" "[[B_BC1]]"
// LINK: {{".*lld.*"}} {{.*}} "-mllvm" "-amdgpu-internalize-symbols"
// LINK-SAME: "-o" "[[IMG_DEV1:.*.out]]" "[[A_BC1]]" "[[B_BC1]]"
// LINK-NOT: "*.llvm-link"
// LINK-NOT: ".*opt"
// LINK-NOT: ".*llc"
// LINK: {{".*lld.*"}} {{.*}} "-o" "[[IMG_DEV2:.*.out]]" "[[A_BC2]]" "[[B_BC2]]"
// LINK: {{".*lld.*"}} {{.*}} "-mllvm" "-amdgpu-internalize-symbols"
// LINK-SAME: "-o" "[[IMG_DEV2:.*.out]]" "[[A_BC2]]" "[[B_BC2]]"
// LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o"
// LINK-SAME: "-targets={{.*}},hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900"

View File

@ -37,7 +37,8 @@
// CHECK-NOT: "*.llvm-link"
// CHECK-NOT: ".*opt"
// CHECK-NOT: ".*llc"
// CHECK: {{".*lld.*"}} {{.*}} "-o" "[[IMG_DEV1:.*.out]]" [[A_BC1]] [[B_BC1]]
// CHECK: {{".*lld.*"}} {{.*}} "-mllvm" "-amdgpu-internalize-symbols"
// CHECK-SAME: "-o" "[[IMG_DEV1:.*.out]]" [[A_BC1]] [[B_BC1]]
// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
@ -62,7 +63,8 @@
// CHECK-NOT: "*.llvm-link"
// CHECK-NOT: ".*opt"
// CHECK-NOT: ".*llc"
// CHECK: {{".*lld.*"}} {{.*}} "-o" "[[IMG_DEV2:.*.out]]" [[A_BC2]] [[B_BC2]]
// CHECK: {{".*lld.*"}} {{.*}} "-mllvm" "-amdgpu-internalize-symbols"
// CHECK-SAME: "-o" "[[IMG_DEV2:.*.out]]" [[A_BC2]] [[B_BC2]]
// CHECK: [[CLANG]] "-cc1" "-triple" "x86_64-unknown-linux-gnu"
// CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa"