forked from OSchip/llvm-project
[AMDGPU] Add GlobalDCE before internalization pass
The internalization pass only internalizes global variables with no users. If the global variable has some dead user, the internalization pass will not internalize it. To be able to internalize global variables with dead users, a global dce pass is needed before the internalization pass. This patch adds that. Reviewed by: Artem Belevich, Matt Arsenault Differential Revision: https://reviews.llvm.org/D98783
This commit is contained in:
parent
ae2da68da6
commit
3597f02fd5
|
@ -0,0 +1,53 @@
|
|||
// REQUIRES: amdgpu-registered-target
|
||||
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
|
||||
// RUN: -std=c++11 -O3 -mllvm -amdgpu-internalize-symbols -emit-llvm -o - \
|
||||
// RUN: -target-cpu gfx906 | FileCheck %s
|
||||
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
|
||||
// RUN: -std=c++11 -O3 -mllvm -amdgpu-internalize-symbols -emit-llvm -o - \
|
||||
// RUN: -target-cpu gfx906 | FileCheck -check-prefix=NEGCHK %s
|
||||
|
||||
#include "Inputs/cuda.h"
|
||||
|
||||
// AMDGPU internalize unused global variables for whole-program compilation
|
||||
// (-fno-gpu-rdc for each TU, or -fgpu-rdc for LTO), which are then
|
||||
// eliminated by global DCE. If there are invisible unused address space casts
|
||||
// for global variables, these dead users need to be eliminated by global
|
||||
// DCE before internalization. This test makes sure unused global variables
|
||||
// are eliminated.
|
||||
|
||||
// Check unused device/constant variables are eliminated.
|
||||
|
||||
// NEGCHK-NOT: @v1
|
||||
__device__ int v1;
|
||||
|
||||
// NEGCHK-NOT: @v2
|
||||
__constant__ int v2;
|
||||
|
||||
// NEGCHK-NOT: @_ZL2v3
|
||||
constexpr int v3 = 1;
|
||||
|
||||
// Check managed variables are always kept.
|
||||
|
||||
// CHECK-DAG: @v4
|
||||
__managed__ int v4;
|
||||
|
||||
// Check used device/constant variables are not eliminated.
|
||||
// CHECK-DAG: @u1
|
||||
__device__ int u1;
|
||||
|
||||
// CHECK-DAG: @u2
|
||||
__constant__ int u2;
|
||||
|
||||
// Check u3 is kept because its address is taken.
|
||||
// CHECK-DAG: @_ZL2u3
|
||||
constexpr int u3 = 2;
|
||||
|
||||
// Check u4 is not kept because it is not ODR-use.
|
||||
// NEGCHK-NOT: @_ZL2u4
|
||||
constexpr int u4 = 3;
|
||||
|
||||
__device__ int fun1(const int& x);
|
||||
|
||||
__global__ void kern1(int *x) {
|
||||
*x = u1 + u2 + fun1(u3) + u4;
|
||||
}
|
|
@ -575,6 +575,9 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB,
|
|||
PM.addPass(AMDGPUPrintfRuntimeBindingPass());
|
||||
|
||||
if (InternalizeSymbols) {
|
||||
// Global variables may have dead uses which need to be removed.
|
||||
// Otherwise these useless global variables will not get internalized.
|
||||
PM.addPass(GlobalDCEPass());
|
||||
PM.addPass(InternalizePass(mustPreserveGV));
|
||||
}
|
||||
PM.addPass(AMDGPUPropagateAttributesLatePass(*this));
|
||||
|
|
Loading…
Reference in New Issue