forked from OSchip/llvm-project
[AArch64] Always add -tune-cpu argument to -cc1 driver
This patch ensures that we always tune for a given CPU on AArch64 targets when the user specifies the "-mtune=xyz" flag. In the AArch64Subtarget if the tune flag is unset we use the CPU value instead. I've updated the release notes here: llvm/docs/ReleaseNotes.rst and added tests here: clang/test/Driver/aarch64-mtune.c Differential Revision: https://reviews.llvm.org/D110258
This commit is contained in:
parent
ca889733a2
commit
607fb1bb8c
|
@ -192,6 +192,13 @@ Arm and AArch64 Support in Clang
|
|||
|
||||
- Support has been added for the following processors (command-line identifiers in parentheses):
|
||||
- Arm Cortex-A510 (``cortex-a510``)
|
||||
- The -mtune flag is no longer ignored for AArch64. It is now possible to
|
||||
tune code generation for a particular CPU with -mtune without setting any
|
||||
architectural features. For example, compiling with
|
||||
"-mcpu=generic -mtune=cortex-a57" will not enable any Cortex-A57 specific
|
||||
architecture features, but will enable certain optimizations specific to
|
||||
Cortex-A57 CPUs and enable the use of a more accurate scheduling model.
|
||||
|
||||
|
||||
Internal API Changes
|
||||
--------------------
|
||||
|
|
|
@ -1833,6 +1833,21 @@ void Clang::AddAArch64TargetArgs(const ArgList &Args,
|
|||
}
|
||||
|
||||
AddAAPCSVolatileBitfieldArgs(Args, CmdArgs);
|
||||
|
||||
if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mtune_EQ)) {
|
||||
StringRef Name = A->getValue();
|
||||
|
||||
std::string TuneCPU;
|
||||
if (Name == "native")
|
||||
TuneCPU = std::string(llvm::sys::getHostCPUName());
|
||||
else
|
||||
TuneCPU = std::string(Name);
|
||||
|
||||
if (!TuneCPU.empty()) {
|
||||
CmdArgs.push_back("-tune-cpu");
|
||||
CmdArgs.push_back(Args.MakeArgString(TuneCPU));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Clang::AddMIPSTargetArgs(const ArgList &Args,
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
// Ensure we support the -mtune flag.
|
||||
|
||||
// There shouldn't be a default -mtune.
|
||||
// RUN: %clang -target aarch64-unknown-unknown -c -### %s 2>&1 \
|
||||
// RUN: | FileCheck %s -check-prefix=NOTUNE
|
||||
// NOTUNE-NOT: "-tune-cpu" "generic"
|
||||
|
||||
// RUN: %clang -target aarch64-unknown-unknown -c -### %s -mtune=generic 2>&1 \
|
||||
// RUN: | FileCheck %s -check-prefix=GENERIC
|
||||
// GENERIC: "-tune-cpu" "generic"
|
||||
|
||||
// RUN: %clang -target aarch64-unknown-unknown -c -### %s -mtune=neoverse-n1 2>&1 \
|
||||
// RUN: | FileCheck %s -check-prefix=NEOVERSE-N1
|
||||
// NEOVERSE-N1: "-tune-cpu" "neoverse-n1"
|
||||
|
||||
// RUN: %clang -target aarch64-unknown-unknown -c -### %s -mtune=thunderx2t99 2>&1 \
|
||||
// RUN: | FileCheck %s -check-prefix=THUNDERX2T99
|
||||
// THUNDERX2T99: "-tune-cpu" "thunderx2t99"
|
||||
|
||||
// Check interaction between march and mtune.
|
||||
|
||||
// RUN: %clang -target aarch64-unknown-unknown -c -### %s -march=armv8-a 2>&1 \
|
||||
// RUN: | FileCheck %s -check-prefix=MARCHARMV8A
|
||||
// MARCHARMV8A: "-target-cpu" "generic"
|
||||
// MARCHARMV8A-NOT: "-tune-cpu" "generic"
|
||||
|
||||
// RUN: %clang -target aarch64-unknown-unknown -c -### %s -march=armv8-a -mtune=cortex-a75 2>&1 \
|
||||
// RUN: | FileCheck %s -check-prefix=MARCHARMV8A-A75
|
||||
// MARCHARMV8A-A75: "-target-cpu" "generic"
|
||||
// MARCHARMV8A-A75: "-tune-cpu" "cortex-a75"
|
||||
|
||||
// Check interaction between mcpu and mtune.
|
||||
|
||||
// RUN: %clang -target aarch64-unknown-unknown -c -### %s -mcpu=thunderx 2>&1 \
|
||||
// RUN: | FileCheck %s -check-prefix=MCPUTHUNDERX
|
||||
// MCPUTHUNDERX: "-target-cpu" "thunderx"
|
||||
// MCPUTHUNDERX-NOT: "-tune-cpu"
|
||||
|
||||
// RUN: %clang -target aarch64-unknown-unknown -c -### %s -mcpu=cortex-a75 -mtune=cortex-a57 2>&1 \
|
||||
// RUN: | FileCheck %s -check-prefix=MCPUA75-MTUNEA57
|
||||
// MCPUA75-MTUNEA57: "-target-cpu" "cortex-a75"
|
||||
// MCPUA75-MTUNEA57: "-tune-cpu" "cortex-a57"
|
|
@ -74,6 +74,10 @@ Changes to the AArch64 Backend
|
|||
------------------------------
|
||||
|
||||
* Added support for the Armv9-A, Armv9.1-A and Armv9.2-A architectures.
|
||||
* The compiler now recognises the "tune-cpu" function attribute to support
|
||||
the use of the -mtune frontend flag. This allows certain scheduling features
|
||||
and optimisations to be enabled independently of the architecture. If the
|
||||
"tune-cpu" attribute is absent it tunes according to the "target-cpu".
|
||||
|
||||
Changes to the ARM Backend
|
||||
--------------------------
|
||||
|
|
|
@ -50,15 +50,17 @@ static cl::opt<bool>
|
|||
static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(true),
|
||||
cl::desc("Enable the use of AA during codegen."));
|
||||
|
||||
AArch64Subtarget &
|
||||
AArch64Subtarget::initializeSubtargetDependencies(StringRef FS,
|
||||
StringRef CPUString) {
|
||||
AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies(
|
||||
StringRef FS, StringRef CPUString, StringRef TuneCPUString) {
|
||||
// Determine default and user-specified characteristics
|
||||
|
||||
if (CPUString.empty())
|
||||
CPUString = "generic";
|
||||
|
||||
ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, FS);
|
||||
if (TuneCPUString.empty())
|
||||
TuneCPUString = CPUString;
|
||||
|
||||
ParseSubtargetFeatures(CPUString, TuneCPUString, FS);
|
||||
initializeProperties();
|
||||
|
||||
return *this;
|
||||
|
@ -198,18 +200,20 @@ void AArch64Subtarget::initializeProperties() {
|
|||
}
|
||||
|
||||
AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
|
||||
const std::string &TuneCPU,
|
||||
const std::string &FS,
|
||||
const TargetMachine &TM, bool LittleEndian,
|
||||
unsigned MinSVEVectorSizeInBitsOverride,
|
||||
unsigned MaxSVEVectorSizeInBitsOverride)
|
||||
: AArch64GenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
|
||||
: AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS),
|
||||
ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
|
||||
CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
|
||||
IsLittle(LittleEndian),
|
||||
MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
|
||||
MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
|
||||
FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS, CPU)),
|
||||
TSInfo(), TLInfo(TM, *this) {
|
||||
FrameLowering(),
|
||||
InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU)), TSInfo(),
|
||||
TLInfo(TM, *this) {
|
||||
if (AArch64::isX18ReservedByDefault(TT))
|
||||
ReserveXRegister.set(18);
|
||||
|
||||
|
|
|
@ -298,7 +298,8 @@ private:
|
|||
/// passed in feature string so that we can use initializer lists for
|
||||
/// subtarget initialization.
|
||||
AArch64Subtarget &initializeSubtargetDependencies(StringRef FS,
|
||||
StringRef CPUString);
|
||||
StringRef CPUString,
|
||||
StringRef TuneCPUString);
|
||||
|
||||
/// Initialize properties based on the selected processor family.
|
||||
void initializeProperties();
|
||||
|
@ -307,8 +308,8 @@ public:
|
|||
/// This constructor initializes the data members to match that
|
||||
/// of the specified triple.
|
||||
AArch64Subtarget(const Triple &TT, const std::string &CPU,
|
||||
const std::string &FS, const TargetMachine &TM,
|
||||
bool LittleEndian,
|
||||
const std::string &TuneCPU, const std::string &FS,
|
||||
const TargetMachine &TM, bool LittleEndian,
|
||||
unsigned MinSVEVectorSizeInBitsOverride = 0,
|
||||
unsigned MaxSVEVectorSizeInBitsOverride = 0);
|
||||
|
||||
|
|
|
@ -355,10 +355,13 @@ AArch64TargetMachine::~AArch64TargetMachine() = default;
|
|||
const AArch64Subtarget *
|
||||
AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
|
||||
Attribute CPUAttr = F.getFnAttribute("target-cpu");
|
||||
Attribute TuneAttr = F.getFnAttribute("tune-cpu");
|
||||
Attribute FSAttr = F.getFnAttribute("target-features");
|
||||
|
||||
std::string CPU =
|
||||
CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU;
|
||||
std::string TuneCPU =
|
||||
TuneAttr.isValid() ? TuneAttr.getValueAsString().str() : CPU;
|
||||
std::string FS =
|
||||
FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS;
|
||||
|
||||
|
@ -399,6 +402,7 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
|
|||
Key += "SVEMax";
|
||||
Key += std::to_string(MaxSVEVectorSize);
|
||||
Key += CPU;
|
||||
Key += TuneCPU;
|
||||
Key += FS;
|
||||
|
||||
auto &I = SubtargetMap[Key];
|
||||
|
@ -407,8 +411,8 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
|
|||
// creation will depend on the TM and the code generation flags on the
|
||||
// function that reside in TargetOptions.
|
||||
resetTargetOptions(F);
|
||||
I = std::make_unique<AArch64Subtarget>(TargetTriple, CPU, FS, *this,
|
||||
isLittle, MinSVEVectorSize,
|
||||
I = std::make_unique<AArch64Subtarget>(TargetTriple, CPU, TuneCPU, FS,
|
||||
*this, isLittle, MinSVEVectorSize,
|
||||
MaxSVEVectorSize);
|
||||
}
|
||||
return I.get();
|
||||
|
|
|
@ -29,6 +29,7 @@ std::unique_ptr<LLVMTargetMachine> createTargetMachine() {
|
|||
|
||||
std::unique_ptr<AArch64InstrInfo> createInstrInfo(TargetMachine *TM) {
|
||||
AArch64Subtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()),
|
||||
std::string(TM->getTargetCPU()),
|
||||
std::string(TM->getTargetFeatureString()), *TM,
|
||||
/* isLittle */ false);
|
||||
return std::make_unique<AArch64InstrInfo>(ST);
|
||||
|
|
|
@ -26,6 +26,7 @@ std::unique_ptr<LLVMTargetMachine> createTargetMachine() {
|
|||
|
||||
std::unique_ptr<AArch64InstrInfo> createInstrInfo(TargetMachine *TM) {
|
||||
AArch64Subtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()),
|
||||
std::string(TM->getTargetCPU()),
|
||||
std::string(TM->getTargetFeatureString()), *TM,
|
||||
/* isLittle */ false);
|
||||
return std::make_unique<AArch64InstrInfo>(ST);
|
||||
|
|
Loading…
Reference in New Issue