forked from OSchip/llvm-project
[CUDA][HIP] Add -Xarch_device and -Xarch_host options
The argument after -Xarch_device will be added to the arguments for CUDA/HIP device compilation and will be removed for host compilation. The argument after -Xarch_host will be added to the arguments for CUDA/HIP host compilation and will be removed for device compilation. Differential Revision: https://reviews.llvm.org/D76520
This commit is contained in:
parent
d381b6a8d3
commit
2ae25647d1
|
@ -466,6 +466,10 @@ def Xanalyzer : Separate<["-"], "Xanalyzer">,
|
|||
HelpText<"Pass <arg> to the static analyzer">, MetaVarName<"<arg>">,
|
||||
Group<StaticAnalyzer_Group>;
|
||||
def Xarch__ : JoinedAndSeparate<["-"], "Xarch_">, Flags<[DriverOption]>;
|
||||
def Xarch_host : Separate<["-"], "Xarch_host">, Flags<[DriverOption]>,
|
||||
HelpText<"Pass <arg> to the CUDA/HIP host compilation">, MetaVarName<"<arg>">;
|
||||
def Xarch_device : Separate<["-"], "Xarch_device">, Flags<[DriverOption]>,
|
||||
HelpText<"Pass <arg> to the CUDA/HIP device compilation">, MetaVarName<"<arg>">;
|
||||
def Xassembler : Separate<["-"], "Xassembler">,
|
||||
HelpText<"Pass <arg> to the assembler">, MetaVarName<"<arg>">,
|
||||
Group<CompileOnly_Group>;
|
||||
|
|
|
@ -296,10 +296,20 @@ public:
|
|||
SmallVectorImpl<llvm::opt::Arg *> &AllocatedArgs) const;
|
||||
|
||||
/// Append the argument following \p A to \p DAL assuming \p A is an Xarch
|
||||
/// argument.
|
||||
virtual void TranslateXarchArgs(const llvm::opt::DerivedArgList &Args,
|
||||
llvm::opt::Arg *&A,
|
||||
llvm::opt::DerivedArgList *DAL) const;
|
||||
/// argument. If \p AllocatedArgs is null pointer, synthesized arguments are
|
||||
/// added to \p DAL, otherwise they are appended to \p AllocatedArgs.
|
||||
virtual void TranslateXarchArgs(
|
||||
const llvm::opt::DerivedArgList &Args, llvm::opt::Arg *&A,
|
||||
llvm::opt::DerivedArgList *DAL,
|
||||
SmallVectorImpl<llvm::opt::Arg *> *AllocatedArgs = nullptr) const;
|
||||
|
||||
/// Translate -Xarch_ arguments. If there are no such arguments, return
|
||||
/// a null pointer, otherwise return a DerivedArgList containing the
|
||||
/// translated arguments.
|
||||
virtual llvm::opt::DerivedArgList *
|
||||
TranslateXarchArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
|
||||
Action::OffloadKind DeviceOffloadKind,
|
||||
SmallVectorImpl<llvm::opt::Arg *> *AllocatedArgs) const;
|
||||
|
||||
/// Choose a tool to use to handle the action \p JA.
|
||||
///
|
||||
|
|
|
@ -76,16 +76,29 @@ Compilation::getArgsForToolChain(const ToolChain *TC, StringRef BoundArch,
|
|||
*TranslatedArgs, SameTripleAsHost, AllocatedArgs);
|
||||
}
|
||||
|
||||
DerivedArgList *NewDAL = nullptr;
|
||||
if (!OpenMPArgs) {
|
||||
NewDAL = TC->TranslateXarchArgs(*TranslatedArgs, BoundArch,
|
||||
DeviceOffloadKind, &AllocatedArgs);
|
||||
} else {
|
||||
NewDAL = TC->TranslateXarchArgs(*OpenMPArgs, BoundArch, DeviceOffloadKind,
|
||||
&AllocatedArgs);
|
||||
if (!NewDAL)
|
||||
NewDAL = OpenMPArgs;
|
||||
else
|
||||
delete OpenMPArgs;
|
||||
}
|
||||
|
||||
if (!NewDAL) {
|
||||
Entry = TC->TranslateArgs(*TranslatedArgs, BoundArch, DeviceOffloadKind);
|
||||
if (!Entry)
|
||||
Entry = TranslatedArgs;
|
||||
} else {
|
||||
Entry = TC->TranslateArgs(*OpenMPArgs, BoundArch, DeviceOffloadKind);
|
||||
Entry = TC->TranslateArgs(*NewDAL, BoundArch, DeviceOffloadKind);
|
||||
if (!Entry)
|
||||
Entry = OpenMPArgs;
|
||||
Entry = NewDAL;
|
||||
else
|
||||
delete OpenMPArgs;
|
||||
delete NewDAL;
|
||||
}
|
||||
|
||||
// Add allocated arguments to the final DAL.
|
||||
|
|
|
@ -1103,11 +1103,20 @@ llvm::opt::DerivedArgList *ToolChain::TranslateOpenMPTargetArgs(
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
void ToolChain::TranslateXarchArgs(const llvm::opt::DerivedArgList &Args,
|
||||
llvm::opt::Arg *&A,
|
||||
llvm::opt::DerivedArgList *DAL) const {
|
||||
// TODO: Currently argument values separated by space e.g.
|
||||
// -Xclang -mframe-pointer=no cannot be passed by -Xarch_. This should be
|
||||
// fixed.
|
||||
void ToolChain::TranslateXarchArgs(
|
||||
const llvm::opt::DerivedArgList &Args, llvm::opt::Arg *&A,
|
||||
llvm::opt::DerivedArgList *DAL,
|
||||
SmallVectorImpl<llvm::opt::Arg *> *AllocatedArgs) const {
|
||||
const OptTable &Opts = getDriver().getOpts();
|
||||
unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
|
||||
unsigned ValuePos = 1;
|
||||
if (A->getOption().matches(options::OPT_Xarch_device) ||
|
||||
A->getOption().matches(options::OPT_Xarch_host))
|
||||
ValuePos = 0;
|
||||
|
||||
unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(ValuePos));
|
||||
unsigned Prev = Index;
|
||||
std::unique_ptr<llvm::opt::Arg> XarchArg(Opts.ParseOneArg(Args, Index));
|
||||
|
||||
|
@ -1130,5 +1139,49 @@ void ToolChain::TranslateXarchArgs(const llvm::opt::DerivedArgList &Args,
|
|||
}
|
||||
XarchArg->setBaseArg(A);
|
||||
A = XarchArg.release();
|
||||
if (!AllocatedArgs)
|
||||
DAL->AddSynthesizedArg(A);
|
||||
else
|
||||
AllocatedArgs->push_back(A);
|
||||
}
|
||||
|
||||
llvm::opt::DerivedArgList *ToolChain::TranslateXarchArgs(
|
||||
const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
|
||||
Action::OffloadKind OFK,
|
||||
SmallVectorImpl<llvm::opt::Arg *> *AllocatedArgs) const {
|
||||
DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs());
|
||||
bool Modified = false;
|
||||
|
||||
bool IsGPU = OFK == Action::OFK_Cuda || OFK == Action::OFK_HIP;
|
||||
for (Arg *A : Args) {
|
||||
bool NeedTrans = false;
|
||||
bool Skip = false;
|
||||
if (A->getOption().matches(options::OPT_Xarch_device)) {
|
||||
NeedTrans = IsGPU;
|
||||
Skip = !IsGPU;
|
||||
} else if (A->getOption().matches(options::OPT_Xarch_host)) {
|
||||
NeedTrans = !IsGPU;
|
||||
Skip = IsGPU;
|
||||
} else if (A->getOption().matches(options::OPT_Xarch__) && IsGPU) {
|
||||
// Do not translate -Xarch_ options for non CUDA/HIP toolchain since
|
||||
// they may need special translation.
|
||||
// Skip this argument unless the architecture matches BoundArch
|
||||
if (BoundArch.empty() || A->getValue(0) != BoundArch)
|
||||
Skip = true;
|
||||
else
|
||||
NeedTrans = true;
|
||||
}
|
||||
if (NeedTrans || Skip)
|
||||
Modified = true;
|
||||
if (NeedTrans)
|
||||
TranslateXarchArgs(Args, A, DAL, AllocatedArgs);
|
||||
if (!Skip)
|
||||
DAL->append(A);
|
||||
}
|
||||
|
||||
if (Modified)
|
||||
return DAL;
|
||||
|
||||
delete DAL;
|
||||
return nullptr;
|
||||
}
|
||||
|
|
|
@ -800,12 +800,6 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
|
|||
}
|
||||
|
||||
for (Arg *A : Args) {
|
||||
if (A->getOption().matches(options::OPT_Xarch__)) {
|
||||
// Skip this argument unless the architecture matches BoundArch
|
||||
if (BoundArch.empty() || A->getValue(0) != BoundArch)
|
||||
continue;
|
||||
TranslateXarchArgs(Args, A, DAL);
|
||||
}
|
||||
DAL->append(A);
|
||||
}
|
||||
|
||||
|
|
|
@ -378,12 +378,6 @@ HIPToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
|
|||
const OptTable &Opts = getDriver().getOpts();
|
||||
|
||||
for (Arg *A : Args) {
|
||||
if (A->getOption().matches(options::OPT_Xarch__)) {
|
||||
// Skip this argument unless the architecture matches BoundArch.
|
||||
if (BoundArch.empty() || A->getValue(0) != BoundArch)
|
||||
continue;
|
||||
TranslateXarchArgs(Args, A, DAL);
|
||||
}
|
||||
DAL->append(A);
|
||||
}
|
||||
|
||||
|
|
|
@ -13,3 +13,16 @@
|
|||
// RUN: -mllvm -amdgpu-early-inline-all=true %s 2>&1 | \
|
||||
// RUN: FileCheck -check-prefix=MLLVM %s
|
||||
// MLLVM-NOT: "-mllvm"{{.*}}"-amdgpu-early-inline-all=true"{{.*}}"-mllvm"{{.*}}"-amdgpu-early-inline-all=true"
|
||||
|
||||
// RUN: %clang -### -Xarch_device -g -nogpulib --cuda-gpu-arch=gfx900 \
|
||||
// RUN: -Xarch_device -fcf-protection=branch \
|
||||
// RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=DEV %s
|
||||
// DEV: clang{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}" {{.*}} "-fcf-protection=branch"
|
||||
// DEV: clang{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}" {{.*}} "-fcf-protection=branch"
|
||||
// DEV-NOT: clang{{.*}} {{.*}} "-debug-info-kind={{.*}}"
|
||||
|
||||
// RUN: %clang -### -Xarch_host -g -nogpulib --cuda-gpu-arch=gfx900 \
|
||||
// RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=HOST %s
|
||||
// HOST-NOT: clang{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}"
|
||||
// HOST-NOT: clang{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}"
|
||||
// HOST: clang{{.*}} "-debug-info-kind={{.*}}"
|
||||
|
|
Loading…
Reference in New Issue