forked from OSchip/llvm-project
[X86] Add support 'tune' in target attribute
This adds parsing and codegen support for tune in target attribute. I've implemented this so that arch in the target attribute implicitly disables tune from the command line. I'm not sure what gcc does here. But since -march implies -mtune. I assume 'arch' in the target attribute implies tune in the target attribute. Differential Revision: https://reviews.llvm.org/D86187
This commit is contained in:
parent
4a36711439
commit
724f570ad2
|
@ -334,11 +334,17 @@ static_assert(sizeof(ParamIdx) == sizeof(ParamIdx::SerialType),
|
|||
struct ParsedTargetAttr {
|
||||
std::vector<std::string> Features;
|
||||
StringRef Architecture;
|
||||
StringRef Tune;
|
||||
StringRef BranchProtection;
|
||||
bool DuplicateArchitecture = false;
|
||||
bool DuplicateTune = false;
|
||||
bool operator ==(const ParsedTargetAttr &Other) const {
|
||||
return DuplicateArchitecture == Other.DuplicateArchitecture &&
|
||||
Architecture == Other.Architecture && Features == Other.Features;
|
||||
DuplicateTune == Other.DuplicateTune &&
|
||||
Architecture == Other.Architecture &&
|
||||
Tune == Other.Tune &&
|
||||
BranchProtection == Other.BranchProtection &&
|
||||
Features == Other.Features;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -2346,11 +2346,10 @@ def Target : InheritableAttr {
|
|||
// accepting it weirdly.
|
||||
Feature = Feature.trim();
|
||||
|
||||
// We don't support cpu tuning this way currently.
|
||||
// TODO: Support the fpmath option. It will require checking
|
||||
// overall feature validity for the function with the rest of the
|
||||
// attributes on the function.
|
||||
if (Feature.startswith("fpmath=") || Feature.startswith("tune="))
|
||||
if (Feature.startswith("fpmath="))
|
||||
continue;
|
||||
|
||||
if (Feature.startswith("branch-protection=")) {
|
||||
|
@ -2364,6 +2363,11 @@ def Target : InheritableAttr {
|
|||
Ret.DuplicateArchitecture = true;
|
||||
else
|
||||
Ret.Architecture = Feature.split("=").second.trim();
|
||||
} else if (Feature.startswith("tune=")) {
|
||||
if (!Ret.Tune.empty())
|
||||
Ret.DuplicateTune = true;
|
||||
else
|
||||
Ret.Tune = Feature.split("=").second.trim();
|
||||
} else if (Feature.startswith("no-"))
|
||||
Ret.Features.push_back("-" + Feature.split("-").second.str());
|
||||
else
|
||||
|
|
|
@ -1898,6 +1898,9 @@ the target with or without a "-mno-" in front corresponding to the absence
|
|||
of the feature, as well as ``arch="CPU"`` which will change the default "CPU"
|
||||
for the function.
|
||||
|
||||
For X86, the attribute also allows ``tune="CPU"`` to optimize the generated
|
||||
code for the given CPU without changing the available instructions.
|
||||
|
||||
For AArch64, the attribute also allows the "branch-protection=<args>" option,
|
||||
where the permissible arguments and their effect on code generation are the same
|
||||
as for the command-line option ``-mbranch-protection``.
|
||||
|
|
|
@ -2829,8 +2829,9 @@ def err_attribute_requires_opencl_version : Error<
|
|||
def err_invalid_branch_protection_spec : Error<
|
||||
"invalid or misplaced branch protection specification '%0'">;
|
||||
def warn_unsupported_target_attribute
|
||||
: Warning<"%select{unsupported|duplicate}0%select{| architecture}1 '%2' in"
|
||||
" the 'target' attribute string; 'target' attribute ignored">,
|
||||
: Warning<"%select{unsupported|duplicate|unknown}0%select{| architecture|"
|
||||
" tune CPU}1 '%2' in the 'target' attribute string; 'target' "
|
||||
"attribute ignored">,
|
||||
InGroup<IgnoredAttributes>;
|
||||
def err_attribute_unsupported
|
||||
: Error<"%0 attribute is not supported for this target">;
|
||||
|
|
|
@ -1148,6 +1148,11 @@ public:
|
|||
return true;
|
||||
}
|
||||
|
||||
/// brief Determine whether this TargetInfo supports tune in target attribute.
|
||||
virtual bool supportsTargetAttributeTune() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Use the specified ABI.
|
||||
///
|
||||
/// \return False on error (invalid ABI name).
|
||||
|
|
|
@ -296,6 +296,10 @@ public:
|
|||
return "";
|
||||
}
|
||||
|
||||
bool supportsTargetAttributeTune() const override {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool isValidCPUName(StringRef Name) const override {
|
||||
bool Only64Bit = getTriple().getArch() != llvm::Triple::x86;
|
||||
return llvm::X86::parseArchX86(Name, Only64Bit) != llvm::X86::CK_None;
|
||||
|
|
|
@ -1770,9 +1770,14 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
|
|||
// the function.
|
||||
if (TD) {
|
||||
ParsedTargetAttr ParsedAttr = TD->parse();
|
||||
if (ParsedAttr.Architecture != "" &&
|
||||
getTarget().isValidCPUName(ParsedAttr.Architecture))
|
||||
if (!ParsedAttr.Architecture.empty() &&
|
||||
getTarget().isValidCPUName(ParsedAttr.Architecture)) {
|
||||
TargetCPU = ParsedAttr.Architecture;
|
||||
TuneCPU = ""; // Clear the tune CPU.
|
||||
}
|
||||
if (!ParsedAttr.Tune.empty() &&
|
||||
getTarget().isValidCPUName(ParsedAttr.Tune))
|
||||
TuneCPU = ParsedAttr.Tune;
|
||||
}
|
||||
} else {
|
||||
// Otherwise just add the existing target cpu and target features to the
|
||||
|
@ -1780,11 +1785,11 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
|
|||
Features = getTarget().getTargetOpts().Features;
|
||||
}
|
||||
|
||||
if (TargetCPU != "") {
|
||||
if (!TargetCPU.empty()) {
|
||||
Attrs.addAttribute("target-cpu", TargetCPU);
|
||||
AddedAttr = true;
|
||||
}
|
||||
if (TuneCPU != "") {
|
||||
if (!TuneCPU.empty()) {
|
||||
Attrs.addAttribute("tune-cpu", TuneCPU);
|
||||
AddedAttr = true;
|
||||
}
|
||||
|
@ -1826,6 +1831,7 @@ void CodeGenModule::setNonAliasAttributes(GlobalDecl GD,
|
|||
// new ones should replace the old.
|
||||
F->removeFnAttr("target-cpu");
|
||||
F->removeFnAttr("target-features");
|
||||
F->removeFnAttr("tune-cpu");
|
||||
F->addAttributes(llvm::AttributeList::FunctionIndex, Attrs);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3062,23 +3062,36 @@ static void handleCodeSegAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
|
|||
// Check for things we'd like to warn about. Multiversioning issues are
|
||||
// handled later in the process, once we know how many exist.
|
||||
bool Sema::checkTargetAttr(SourceLocation LiteralLoc, StringRef AttrStr) {
|
||||
enum FirstParam { Unsupported, Duplicate };
|
||||
enum SecondParam { None, Architecture };
|
||||
for (auto Str : {"tune=", "fpmath="})
|
||||
if (AttrStr.find(Str) != StringRef::npos)
|
||||
return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
|
||||
<< Unsupported << None << Str;
|
||||
enum FirstParam { Unsupported, Duplicate, Unknown };
|
||||
enum SecondParam { None, Architecture, Tune };
|
||||
if (AttrStr.find("fpmath=") != StringRef::npos)
|
||||
return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
|
||||
<< Unsupported << None << "fpmath=";
|
||||
|
||||
// Diagnose use of tune if target doesn't support it.
|
||||
if (!Context.getTargetInfo().supportsTargetAttributeTune() &&
|
||||
AttrStr.find("tune=") != StringRef::npos)
|
||||
return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
|
||||
<< Unsupported << None << "tune=";
|
||||
|
||||
ParsedTargetAttr ParsedAttrs = TargetAttr::parse(AttrStr);
|
||||
|
||||
if (!ParsedAttrs.Architecture.empty() &&
|
||||
!Context.getTargetInfo().isValidCPUName(ParsedAttrs.Architecture))
|
||||
return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
|
||||
<< Unsupported << Architecture << ParsedAttrs.Architecture;
|
||||
<< Unknown << Architecture << ParsedAttrs.Architecture;
|
||||
|
||||
if (!ParsedAttrs.Tune.empty() &&
|
||||
!Context.getTargetInfo().isValidCPUName(ParsedAttrs.Tune))
|
||||
return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
|
||||
<< Unknown << Tune << ParsedAttrs.Tune;
|
||||
|
||||
if (ParsedAttrs.DuplicateArchitecture)
|
||||
return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
|
||||
<< Duplicate << None << "arch=";
|
||||
if (ParsedAttrs.DuplicateTune)
|
||||
return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
|
||||
<< Duplicate << None << "tune=";
|
||||
|
||||
for (const auto &Feature : ParsedAttrs.Features) {
|
||||
auto CurFeature = StringRef(Feature).drop_front(); // remove + or -.
|
||||
|
|
|
@ -1,10 +1,9 @@
|
|||
// RUN: %clang_cc1 -triple i686-linux-gnu -target-cpu i686 -emit-llvm %s -o - | FileCheck %s
|
||||
// RUN: %clang_cc1 -triple i686-linux-gnu -target-cpu i686 -tune-cpu i686 -emit-llvm %s -o - | FileCheck %s
|
||||
|
||||
int baz(int a) { return 4; }
|
||||
|
||||
int __attribute__((target("avx,sse4.2,arch=ivybridge"))) foo(int a) { return 4; }
|
||||
|
||||
int __attribute__((target("tune=sandybridge"))) walrus(int a) { return 4; }
|
||||
int __attribute__((target("fpmath=387"))) koala(int a) { return 4; }
|
||||
|
||||
int __attribute__((target("no-sse2"))) echidna(int a) { return 4; }
|
||||
|
@ -31,12 +30,11 @@ int __attribute__((target("arch=lakemont,mmx"))) use_before_def(void) {
|
|||
return 5;
|
||||
}
|
||||
|
||||
int __attribute__((target("tune=sandybridge"))) walrus(int a) { return 4; }
|
||||
|
||||
// Check that we emit the additional subtarget and cpu features for foo and not for baz or bar.
|
||||
// CHECK: baz{{.*}} #0
|
||||
// CHECK: foo{{.*}} #1
|
||||
// We ignore the tune attribute so walrus should be identical to baz and bar.
|
||||
// CHECK: walrus{{.*}} #0
|
||||
// We're currently ignoring the fpmath attribute so koala should be identical to baz and bar.
|
||||
// CHECK: koala{{.*}} #0
|
||||
// CHECK: echidna{{.*}} #2
|
||||
|
@ -48,11 +46,16 @@ int __attribute__((target("arch=lakemont,mmx"))) use_before_def(void) {
|
|||
// CHECK: qq{{.*}} #6
|
||||
// CHECK: lake{{.*}} #7
|
||||
// CHECK: use_before_def{{.*}} #7
|
||||
// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87"
|
||||
// CHECK: walrus{{.*}} #8
|
||||
// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87" "tune-cpu"="i686"
|
||||
// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
|
||||
// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop"
|
||||
// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
|
||||
// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop"
|
||||
// CHECK-NOT: tune-cpu
|
||||
// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
|
||||
// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686"
|
||||
// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
|
||||
// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes"
|
||||
// CHECK-NOT: tune-cpu
|
||||
// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-3dnow,-3dnowa,-mmx"
|
||||
// CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx"
|
||||
// CHECK-NOT: tune-cpu
|
||||
// CHECK: #8 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87" "tune-cpu"="sandybridge"
|
||||
|
|
|
@ -1,22 +1,34 @@
|
|||
// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -verify %s
|
||||
// RUN: %clang_cc1 -triple aarch64-linux-gnu -fsyntax-only -verify %s
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
int __attribute__((target("avx,sse4.2,arch=ivybridge"))) foo() { return 4; }
|
||||
//expected-error@+1 {{'target' attribute takes one argument}}
|
||||
int __attribute__((target())) bar() { return 4; }
|
||||
//expected-warning@+1 {{unsupported 'tune=' in the 'target' attribute string; 'target' attribute ignored}}
|
||||
// no warning, tune is supported for x86
|
||||
int __attribute__((target("tune=sandybridge"))) baz() { return 4; }
|
||||
//expected-warning@+1 {{unsupported 'fpmath=' in the 'target' attribute string; 'target' attribute ignored}}
|
||||
int __attribute__((target("fpmath=387"))) walrus() { return 4; }
|
||||
//expected-warning@+1 {{unsupported architecture 'hiss' in the 'target' attribute string; 'target' attribute ignored}}
|
||||
//expected-warning@+1 {{unknown architecture 'hiss' in the 'target' attribute string; 'target' attribute ignored}}
|
||||
int __attribute__((target("avx,sse4.2,arch=hiss"))) meow() { return 4; }
|
||||
//expected-warning@+1 {{unsupported 'woof' in the 'target' attribute string; 'target' attribute ignored}}
|
||||
int __attribute__((target("woof"))) bark() { return 4; }
|
||||
// no warning, same as saying 'nothing'.
|
||||
int __attribute__((target("arch="))) turtle() { return 4; }
|
||||
//expected-warning@+1 {{unsupported architecture 'hiss' in the 'target' attribute string; 'target' attribute ignored}}
|
||||
//expected-warning@+1 {{unknown architecture 'hiss' in the 'target' attribute string; 'target' attribute ignored}}
|
||||
int __attribute__((target("arch=hiss,arch=woof"))) pine_tree() { return 4; }
|
||||
//expected-warning@+1 {{duplicate 'arch=' in the 'target' attribute string; 'target' attribute ignored}}
|
||||
int __attribute__((target("arch=ivybridge,arch=haswell"))) oak_tree() { return 4; }
|
||||
//expected-warning@+1 {{unsupported 'branch-protection' in the 'target' attribute string; 'target' attribute ignored}}
|
||||
int __attribute__((target("branch-protection=none"))) birch_tree() { return 5; }
|
||||
//expected-warning@+1 {{unknown tune CPU 'hiss' in the 'target' attribute string; 'target' attribute ignored}}
|
||||
int __attribute__((target("tune=hiss,tune=woof"))) apple_tree() { return 4; }
|
||||
|
||||
#else
|
||||
|
||||
// tune is not supported by other targets.
|
||||
//expected-warning@+1 {{unsupported 'tune=' in the 'target' attribute string; 'target' attribute ignored}}
|
||||
int __attribute__((target("tune=hiss"))) baz() { return 4; }
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue