[clang][HIP] Updating driver to enable archive/bitcode to bitcode linking when targeting HIPAMD toolchain

Differential Revision: https://reviews.llvm.org/D124151
This commit is contained in:
Jacob Lambert 2022-04-21 00:01:15 -07:00
parent 8960ba7491
commit afcc6baac5
5 changed files with 117 additions and 9 deletions

View File

@ -3065,7 +3065,7 @@ class OffloadingActionBuilder final {
// amdgcn does not support linking of object files, therefore we skip
// backend and assemble phases to output LLVM IR. Except for generating
// non-relocatable device coee, where we generate fat binary for device
// non-relocatable device code, where we generate fat binary for device
// code and pass to host in Backend phase.
if (CudaDeviceActions.empty())
return ABRT_Success;
@ -3074,7 +3074,7 @@ class OffloadingActionBuilder final {
CudaDeviceActions.size() == GpuArchList.size()) &&
"Expecting one action per GPU architecture.");
assert(!CompileHostOnly &&
"Not expecting CUDA actions in host-only compilation.");
"Not expecting HIP actions in host-only compilation.");
if (!Relocatable && CurPhase == phases::Backend && !EmitLLVM &&
!EmitAsm) {
@ -3203,12 +3203,16 @@ class OffloadingActionBuilder final {
"Linker inputs and GPU arch list sizes do not match.");
ActionList Actions;
// Append a new link action for each device.
unsigned I = 0;
// Append a new link action for each device.
// Each entry in DeviceLinkerInputs corresponds to a GPU arch.
for (auto &LI : DeviceLinkerInputs) {
// Each entry in DeviceLinkerInputs corresponds to a GPU arch.
auto *DeviceLinkAction =
C.MakeAction<LinkJobAction>(LI, types::TY_Image);
types::ID Output = Args.hasArg(options::OPT_emit_llvm)
? types::TY_LLVM_BC
: types::TY_Image;
auto *DeviceLinkAction = C.MakeAction<LinkJobAction>(LI, Output);
// Linking all inputs for the current GPU arch.
// LI contains all the inputs for the linker.
OffloadAction::DeviceDependences DeviceLinkDeps;
@ -3220,6 +3224,12 @@ class OffloadingActionBuilder final {
}
DeviceLinkerInputs.clear();
// If emitting LLVM, do not generate final host/device compilation action
if (Args.hasArg(options::OPT_emit_llvm)) {
AL.append(Actions);
return;
}
// Create a host object from all the device images by embedding them
// in a fat binary for mixed host-device compilation. For device-only
// compilation, creates a fat binary.
@ -3747,7 +3757,8 @@ void Driver::handleArguments(Compilation &C, DerivedArgList &Args,
phases::ID FinalPhase = getFinalPhase(Args, &FinalPhaseArg);
if (FinalPhase == phases::Link) {
if (Args.hasArg(options::OPT_emit_llvm))
// Emitting LLVM while linking disabled except in HIPAMD Toolchain
if (Args.hasArg(options::OPT_emit_llvm) && !Args.hasArg(options::OPT_hip_link))
Diag(clang::diag::err_drv_emit_llvm_link);
if (IsCLMode() && LTOMode != LTOK_None &&
!Args.getLastArgValue(options::OPT_fuse_ld_EQ)
@ -3932,7 +3943,10 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
// Queue linker inputs.
if (Phase == phases::Link) {
assert(Phase == PL.back() && "linking must be final compilation step.");
LinkerInputs.push_back(Current);
// We don't need to generate additional link commands if emitting AMD bitcode
if (!(C.getInputArgs().hasArg(options::OPT_hip_link) &&
(C.getInputArgs().hasArg(options::OPT_emit_llvm))))
LinkerInputs.push_back(Current);
Current = nullptr;
break;
}

View File

@ -72,6 +72,36 @@ static bool shouldSkipSanitizeOption(const ToolChain &TC,
return false;
}
void AMDGCN::Linker::constructLlvmLinkCommand(Compilation &C,
const JobAction &JA,
const InputInfoList &Inputs,
const InputInfo &Output,
const llvm::opt::ArgList &Args) const {
// Construct llvm-link command.
// The output from llvm-link is a bitcode file.
ArgStringList LlvmLinkArgs;
assert(!Inputs.empty() && "Must have at least one input.");
LlvmLinkArgs.append({"-o", Output.getFilename()});
for (auto Input : Inputs)
LlvmLinkArgs.push_back(Input.getFilename());
// Look for archive of bundled bitcode in arguments, and add temporary files
// for the extracted archive of bitcode to inputs.
auto TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ);
AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, LlvmLinkArgs, "amdgcn",
TargetID,
/*IsBitCodeSDL=*/true,
/*PostClangLink=*/false);
const char *LlvmLink =
Args.MakeArgString(getToolChain().GetProgramPath("llvm-link"));
C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
LlvmLink, LlvmLinkArgs, Inputs,
Output));
}
void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
const InputInfoList &Inputs,
const InputInfo &Output,
@ -135,7 +165,8 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
}
// For amdgcn the inputs of the linker job are device bitcode and output is
// object file. It calls llvm-link, opt, llc, then lld steps.
// either an object file or bitcode (-emit-llvm). It calls llvm-link, opt,
// llc, then lld steps.
void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
@ -151,6 +182,9 @@ void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA,
return HIP::constructHIPFatbinCommand(C, JA, Output.getFilename(), Inputs,
Args, *this);
if (JA.getType() == types::TY_LLVM_BC)
return constructLlvmLinkCommand(C, JA, Inputs, Output, Args);
return constructLldCommand(C, JA, Inputs, Output, Args);
}

View File

@ -36,6 +36,10 @@ private:
void constructLldCommand(Compilation &C, const JobAction &JA,
const InputInfoList &Inputs, const InputInfo &Output,
const llvm::opt::ArgList &Args) const;
void constructLlvmLinkCommand(Compilation &C, const JobAction &JA,
const InputInfoList &Inputs,
const InputInfo &Output,
const llvm::opt::ArgList &Args) const;
};
} // end namespace AMDGCN

View File

@ -0,0 +1,34 @@
// REQUIRES: clang-driver, x86-registered-target, amdgpu-registered-target
// Check that clang unbundles the two bitcodes and links via llvm-link
// RUN: touch %T/bundle1.bc
// RUN: touch %T/bundle2.bc
// RUN: %clang -### --offload-arch=gfx906 --hip-link \
// RUN: -emit-llvm -fgpu-rdc --cuda-device-only \
// RUN: %T/bundle1.bc %T/bundle2.bc \
// RUN: 2>&1 | FileCheck -check-prefix=BITCODE %s
// BITCODE: "{{.*}}clang-offload-bundler" "-type=bc" "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx906" "-input={{.*}}bundle1.bc" "-output=[[B1HOST:.*\.bc]]" "-output=[[B1DEV1:.*\.bc]]" "-unbundle" "-allow-missing-bundles"
// BITCODE: "{{.*}}clang-{{.*}}" "-o" "[[B1DEV2:.*bundle1-gfx906.bc]]" "-x" "ir" "[[B1DEV1]]"
// BITCODE: "{{.*}}clang-offload-bundler" "-type=bc" "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx906" "-input={{.*}}bundle2.bc" "-output=[[B2HOST:.*\.bc]]" "-output=[[B2DEV1:.*\.bc]]" "-unbundle" "-allow-missing-bundles"
// BITCODE: "{{.*}}clang-{{.*}}" "-o" "[[B2DEV2:.*bundle2-gfx906.bc]]" "-x" "ir" "[[B2DEV1]]"
// BITCODE: "{{.*}}llvm-link" "-o" "bundle1-hip-amdgcn-amd-amdhsa-gfx906.bc" "[[B1DEV2]]" "[[B2DEV2]]"
// Check that clang unbundles the bitcode and archive and links via llvm-link
// RUN: touch %T/libhipbundle.a
// RUN: touch %T/bundle.bc
// RUN: %clang -### --offload-arch=gfx906 --hip-link \
// RUN: -emit-llvm -fgpu-rdc --cuda-device-only \
// RUN: %T/bundle.bc -L%T -lhipbundle \
// RUN: 2>&1 | FileCheck -check-prefix=ARCHIVE %s
// ARCHIVE: "{{.*}}clang-offload-bundler" "-type=bc" "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx906" "-input={{.*}}bundle.bc" "-output=[[HOST:.*\.bc]]" "-output=[[DEV1:.*\.bc]]" "-unbundle" "-allow-missing-bundles"
// ARCHIVE: "{{.*}}clang-{{.*}}" "-o" "[[DEV2:.*\.bc]]" "-x" "ir" "[[DEV1]]"
// ARCHIVE: "{{.*}}clang-offload-bundler" "-unbundle" "-type=a" "-input={{.*}}libhipbundle.a" "-targets=hip-amdgcn-amd-amdhsa-gfx906" "-output=[[AR:.*\.a]]" "-allow-missing-bundles" "-hip-openmp-compatible"
// ARCHIVE: "{{.*}}llvm-link" "-o" "bundle-hip-amdgcn-amd-amdhsa-gfx906.bc" "[[DEV2]]" "[[AR]]"

View File

@ -520,3 +520,25 @@
// MIXED2-DAG: input, "{{.*}}empty.cpp", hip, (device-hip, gfx803)
// MIXED2-DAG: input, "{{.*}}empty.cpp", hip, (device-hip, gfx900)
// MIXED2-NEG-NOT: input, "{{.*}}empty.cpp", c++
// Test HIP bitcode to bitcode linking. Input should be bundled or unbundled bitcode, and
// output should be unbundled linked bitcode
// RUN: touch %T/bitcodeA.bc
// RUN: touch %T/bitcodeB.bc
// RUN: %clang -ccc-print-phases --hip-link -emit-llvm --cuda-device-only \
// RUN: --offload-arch=gfx906 %T/bitcodeA.bc %T/bitcodeB.bc 2>&1 \
// RUN: | FileCheck -check-prefixes=CHECK %s
// CHECK: [[A0:[0-9]+]]: input, "{{.*}}bitcodeA.bc", ir
// CHECK: [[A1:[0-9]+]]: clang-offload-unbundler, {[[A0]]}, ir
// CHECK: [[A2:[0-9]+]]: compiler, {[[A1]]}, ir, (device-hip, [[ARCH:gfx906]])
// CHECK: [[A3:[0-9]+]]: backend, {[[A2]]}, ir, (device-hip, [[ARCH]])
// CHECK: [[B0:[0-9]+]]: input, "{{.*}}bitcodeB.bc", ir
// CHECK: [[B1:[0-9]+]]: clang-offload-unbundler, {[[B0]]}, ir
// CHECK: [[B2:[0-9]+]]: compiler, {[[B1]]}, ir, (device-hip, [[ARCH]])
// CHECK: [[B3:[0-9]+]]: backend, {[[B2]]}, ir, (device-hip, [[ARCH]])
// CHECK: [[L0:[0-9]+]]: linker, {[[A3]], [[B3]]}, ir, (device-hip, [[ARCH]])
// CHECK: offload, "device-hip (amdgcn-amd-amdhsa:[[ARCH]])" {[[L0]]}, ir