forked from OSchip/llvm-project
[CUDA] Handle -O options (more) correctly.
Summary: Previously we'd crash the driver if you passed -O0. Now we try to handle all of clang's various optimization flags in a sane way. Reviewers: tra Subscribers: cfe-commits, echristo, jhen Differential Revision: http://reviews.llvm.org/D16307 llvm-svn: 258174
This commit is contained in:
parent
0872e46c9d
commit
2836dcdb75
|
@ -10663,10 +10663,35 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
|
|||
ArgStringList CmdArgs;
|
||||
CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
|
||||
|
||||
// Clang's default optimization level is -O0, but ptxas's default is -O3.
|
||||
CmdArgs.push_back(Args.MakeArgString(
|
||||
llvm::Twine("-O") +
|
||||
Args.getLastArgValue(options::OPT_O_Group, "0").data()));
|
||||
// Map the -O we received to -O{0,1,2,3}.
|
||||
//
|
||||
// TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's default,
|
||||
// so it may correspond more closely to the spirit of clang -O2.
|
||||
if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
|
||||
// -O3 seems like the least-bad option when -Osomething is specified to
|
||||
// clang but it isn't handled below.
|
||||
StringRef OOpt = "3";
|
||||
if (A->getOption().matches(options::OPT_O4) ||
|
||||
A->getOption().matches(options::OPT_Ofast))
|
||||
OOpt = "3";
|
||||
else if (A->getOption().matches(options::OPT_O0))
|
||||
OOpt = "0";
|
||||
else if (A->getOption().matches(options::OPT_O)) {
|
||||
// -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
|
||||
OOpt = llvm::StringSwitch<const char *>(A->getValue())
|
||||
.Case("1", "1")
|
||||
.Case("2", "2")
|
||||
.Case("3", "3")
|
||||
.Case("s", "2")
|
||||
.Case("z", "2")
|
||||
.Default("2");
|
||||
}
|
||||
CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
|
||||
} else {
|
||||
// If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
|
||||
// to no optimizations, but ptxas's default is -O3.
|
||||
CmdArgs.push_back("-O0");
|
||||
}
|
||||
|
||||
// Don't bother passing -g to ptxas: It's enabled by default at -O0, and
|
||||
// not supported at other optimization levels.
|
||||
|
|
|
@ -4,14 +4,31 @@
|
|||
// REQUIRES: x86-registered-target
|
||||
// REQUIRES: nvptx-registered-target
|
||||
|
||||
// Regular compile with -O2.
|
||||
// Regular compiles with -O{0,1,2,3,4,fast}. -O4 and -Ofast map to ptxas O3.
|
||||
// RUN: %clang -### -target x86_64-linux-gnu -O0 -c %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT0 %s
|
||||
// RUN: %clang -### -target x86_64-linux-gnu -O1 -c %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT1 %s
|
||||
// RUN: %clang -### -target x86_64-linux-gnu -O2 -c %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT2 %s
|
||||
// RUN: %clang -### -target x86_64-linux-gnu -O3 -c %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT3 %s
|
||||
// RUN: %clang -### -target x86_64-linux-gnu -O4 -c %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT3 %s
|
||||
// RUN: %clang -### -target x86_64-linux-gnu -Ofast -c %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT3 %s
|
||||
|
||||
// Regular compile without -O. This should result in us passing -O0 to ptxas.
|
||||
// RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT0 %s
|
||||
|
||||
// Regular compiles with -Os and -Oz. For lack of a better option, we map
|
||||
// these to ptxas -O3.
|
||||
// RUN: %clang -### -target x86_64-linux-gnu -Os -c %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT2 %s
|
||||
// RUN: %clang -### -target x86_64-linux-gnu -Oz -c %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT2 %s
|
||||
|
||||
// Regular compile targeting sm_35.
|
||||
// RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_35 -c %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM35 %s
|
||||
|
@ -42,7 +59,9 @@
|
|||
// ARCH64: "-m64"
|
||||
// ARCH32: "-m32"
|
||||
// OPT0: "-O0"
|
||||
// OPT1: "-O1"
|
||||
// OPT2: "-O2"
|
||||
// OPT3: "-O3"
|
||||
// SM20: "--gpu-name" "sm_20"
|
||||
// SM35: "--gpu-name" "sm_35"
|
||||
// SM20: "--output-file" "[[CUBINFILE:[^"]*]]"
|
||||
|
|
Loading…
Reference in New Issue