forked from OSchip/llvm-project
Revert "[HIP] [AlwaysInliner] Disable AlwaysInliner to eliminate undefined symbols"
This reverts commit 03375a3fb3
.
This commit is contained in:
parent
4594f81165
commit
1830ec94ac
|
@ -5089,9 +5089,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Enable -mconstructor-aliases except on darwin, where we have to work around
|
// Enable -mconstructor-aliases except on darwin, where we have to work around
|
||||||
// a linker bug (see <rdar://problem/7651567>), and CUDA device code, where
|
// a linker bug (see <rdar://problem/7651567>), and CUDA/AMDGPU device code,
|
||||||
// aliases aren't supported.
|
// where aliases aren't supported.
|
||||||
if (!RawTriple.isOSDarwin() && !RawTriple.isNVPTX())
|
if (!RawTriple.isOSDarwin() && !RawTriple.isNVPTX() && !RawTriple.isAMDGPU())
|
||||||
CmdArgs.push_back("-mconstructor-aliases");
|
CmdArgs.push_back("-mconstructor-aliases");
|
||||||
|
|
||||||
// Darwin's kernel doesn't support guard variables; just die if we
|
// Darwin's kernel doesn't support guard variables; just die if we
|
||||||
|
|
|
@ -1,17 +0,0 @@
|
||||||
// REQUIRES: amdgpu-registered-target, clang-driver
|
|
||||||
|
|
||||||
// RUN: %clang --offload-arch=gfx906 --cuda-device-only -nogpulib -nogpuinc -x hip -emit-llvm -S -o - %s \
|
|
||||||
// RUN: -fgpu-rdc -O3 -mllvm -amdgpu-early-inline-all=true -mllvm -amdgpu-function-calls=false | \
|
|
||||||
// RUN: FileCheck %s
|
|
||||||
|
|
||||||
#include "Inputs/cuda.h"
|
|
||||||
|
|
||||||
// CHECK: %struct.B = type { i8 }
|
|
||||||
struct B {
|
|
||||||
|
|
||||||
// CHECK: @_ZN1BC1Ei = hidden unnamed_addr alias void (%struct.B*, i32), void (%struct.B*, i32)* @_ZN1BC2Ei
|
|
||||||
__device__ B(int x);
|
|
||||||
};
|
|
||||||
|
|
||||||
__device__ B::B(int x) {
|
|
||||||
}
|
|
|
@ -15,7 +15,6 @@
|
||||||
#include "AMDGPU.h"
|
#include "AMDGPU.h"
|
||||||
#include "AMDGPUTargetMachine.h"
|
#include "AMDGPUTargetMachine.h"
|
||||||
#include "Utils/AMDGPUBaseInfo.h"
|
#include "Utils/AMDGPUBaseInfo.h"
|
||||||
#include "llvm/CodeGen/CommandFlags.h"
|
|
||||||
#include "llvm/IR/Module.h"
|
#include "llvm/IR/Module.h"
|
||||||
#include "llvm/Pass.h"
|
#include "llvm/Pass.h"
|
||||||
#include "llvm/Support/CommandLine.h"
|
#include "llvm/Support/CommandLine.h"
|
||||||
|
@ -91,13 +90,9 @@ static bool alwaysInlineImpl(Module &M, bool GlobalOpt) {
|
||||||
|
|
||||||
SmallPtrSet<Function *, 8> FuncsToAlwaysInline;
|
SmallPtrSet<Function *, 8> FuncsToAlwaysInline;
|
||||||
SmallPtrSet<Function *, 8> FuncsToNoInline;
|
SmallPtrSet<Function *, 8> FuncsToNoInline;
|
||||||
Triple TT(M.getTargetTriple());
|
|
||||||
|
|
||||||
for (GlobalAlias &A : M.aliases()) {
|
for (GlobalAlias &A : M.aliases()) {
|
||||||
if (Function* F = dyn_cast<Function>(A.getAliasee())) {
|
if (Function* F = dyn_cast<Function>(A.getAliasee())) {
|
||||||
if (TT.getArch() == Triple::amdgcn &&
|
|
||||||
A.getLinkage() != GlobalValue::InternalLinkage)
|
|
||||||
continue;
|
|
||||||
A.replaceAllUsesWith(F);
|
A.replaceAllUsesWith(F);
|
||||||
AliasesToRemove.push_back(&A);
|
AliasesToRemove.push_back(&A);
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,8 +29,6 @@
|
||||||
#include "SIMachineFunctionInfo.h"
|
#include "SIMachineFunctionInfo.h"
|
||||||
#include "llvm/Analysis/CallGraph.h"
|
#include "llvm/Analysis/CallGraph.h"
|
||||||
#include "llvm/CodeGen/TargetPassConfig.h"
|
#include "llvm/CodeGen/TargetPassConfig.h"
|
||||||
#include "llvm/IR/GlobalAlias.h"
|
|
||||||
#include "llvm/IR/GlobalValue.h"
|
|
||||||
#include "llvm/Target/TargetMachine.h"
|
#include "llvm/Target/TargetMachine.h"
|
||||||
|
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
@ -63,8 +61,7 @@ static const Function *getCalleeFunction(const MachineOperand &Op) {
|
||||||
assert(Op.getImm() == 0);
|
assert(Op.getImm() == 0);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
if (auto *GA = dyn_cast<GlobalAlias>(Op.getGlobal()))
|
|
||||||
return cast<Function>(GA->getOperand(0));
|
|
||||||
return cast<Function>(Op.getGlobal());
|
return cast<Function>(Op.getGlobal());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
; RUN: llc -mtriple amdgcn-unknown-linux-gnu -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s
|
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s
|
||||||
; RUN: llc -mtriple amdgcn-unknown-linux-gnu -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
|
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
|
||||||
; RUN: llc -mtriple r600-unknown-linux-gnu -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s --check-prefix=R600
|
; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s
|
||||||
|
|
||||||
; ALL-NOT: {{^}}func:
|
; ALL-NOT: {{^}}func:
|
||||||
define internal i32 @func(i32 %a) {
|
define internal i32 @func(i32 %a) {
|
||||||
|
@ -9,7 +9,7 @@ entry:
|
||||||
ret i32 %tmp0
|
ret i32 %tmp0
|
||||||
}
|
}
|
||||||
|
|
||||||
; CHECK: {{^}}kernel:
|
; ALL: {{^}}kernel:
|
||||||
; GCN-NOT: s_swappc_b64
|
; GCN-NOT: s_swappc_b64
|
||||||
define amdgpu_kernel void @kernel(i32 addrspace(1)* %out) {
|
define amdgpu_kernel void @kernel(i32 addrspace(1)* %out) {
|
||||||
entry:
|
entry:
|
||||||
|
@ -18,13 +18,12 @@ entry:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; CHECK: func_alias
|
; CHECK-NOT: func_alias
|
||||||
; R600-NOT: func_alias
|
; ALL-NOT: func_alias
|
||||||
@func_alias = alias i32 (i32), i32 (i32)* @func
|
@func_alias = alias i32 (i32), i32 (i32)* @func
|
||||||
|
|
||||||
; CHECK-NOT: {{^}}kernel3:
|
; ALL: {{^}}kernel3:
|
||||||
; GCN-NOT: s_swappc_b64
|
; GCN-NOT: s_swappc_b64
|
||||||
; R600: {{^}}kernel3:
|
|
||||||
define amdgpu_kernel void @kernel3(i32 addrspace(1)* %out) {
|
define amdgpu_kernel void @kernel3(i32 addrspace(1)* %out) {
|
||||||
entry:
|
entry:
|
||||||
%tmp0 = call i32 @func_alias(i32 1)
|
%tmp0 = call i32 @func_alias(i32 1)
|
||||||
|
|
Loading…
Reference in New Issue