[AMDGPU] Add GlobalDCE before internalization pass

The internalization pass only internalizes global variables
with no users. If the global variable has some dead user,
the internalization pass will not internalize it.

To be able to internalize global variables with dead
users, a global dce pass is needed before the
internalization pass.

This patch adds that.

Reviewed by: Artem Belevich, Matt Arsenault

Differential Revision: https://reviews.llvm.org/D98783
This commit is contained in:
Yaxun (Sam) Liu 2021-03-17 14:31:06 +00:00
parent ae2da68da6
commit 3597f02fd5
2 changed files with 56 additions and 0 deletions

View File

@ -0,0 +1,53 @@
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
// RUN: -std=c++11 -O3 -mllvm -amdgpu-internalize-symbols -emit-llvm -o - \
// RUN: -target-cpu gfx906 | FileCheck %s
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
// RUN: -std=c++11 -O3 -mllvm -amdgpu-internalize-symbols -emit-llvm -o - \
// RUN: -target-cpu gfx906 | FileCheck -check-prefix=NEGCHK %s
#include "Inputs/cuda.h"
// AMDGPU internalize unused global variables for whole-program compilation
// (-fno-gpu-rdc for each TU, or -fgpu-rdc for LTO), which are then
// eliminated by global DCE. If there are invisible unused address space casts
// for global variables, these dead users need to be eliminated by global
// DCE before internalization. This test makes sure unused global variables
// are eliminated.
// Check unused device/constant variables are eliminated.
// NEGCHK-NOT: @v1
__device__ int v1;
// NEGCHK-NOT: @v2
__constant__ int v2;
// NEGCHK-NOT: @_ZL2v3
constexpr int v3 = 1;
// Check managed variables are always kept.
// CHECK-DAG: @v4
__managed__ int v4;
// Check used device/constant variables are not eliminated.
// CHECK-DAG: @u1
__device__ int u1;
// CHECK-DAG: @u2
__constant__ int u2;
// Check u3 is kept because its address is taken.
// CHECK-DAG: @_ZL2u3
constexpr int u3 = 2;
// Check u4 is not kept because it is not ODR-use.
// NEGCHK-NOT: @_ZL2u4
constexpr int u4 = 3;
__device__ int fun1(const int& x);
__global__ void kern1(int *x) {
*x = u1 + u2 + fun1(u3) + u4;
}

View File

@ -575,6 +575,9 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB,
PM.addPass(AMDGPUPrintfRuntimeBindingPass());
if (InternalizeSymbols) {
// Global variables may have dead uses which need to be removed.
// Otherwise these useless global variables will not get internalized.
PM.addPass(GlobalDCEPass());
PM.addPass(InternalizePass(mustPreserveGV));
}
PM.addPass(AMDGPUPropagateAttributesLatePass(*this));