forked from OSchip/llvm-project
[X86] Rename Subtarget Tuning Feature Flag Prefix. NFC.
As suggested on D107370, this patch renames the tuning feature flags to start with 'Tuning' instead of 'Feature'. Differential Revision: https://reviews.llvm.org/D107459
This commit is contained in:
parent
cc947e29ea
commit
e78bf49a58
|
@ -1086,11 +1086,11 @@ unsigned X86AsmBackend::getMaximumNopSize() const {
|
|||
return 4;
|
||||
if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Mode64Bit))
|
||||
return 1;
|
||||
if (STI.getFeatureBits()[X86::FeatureFast7ByteNOP])
|
||||
if (STI.getFeatureBits()[X86::TuningFast7ByteNOP])
|
||||
return 7;
|
||||
if (STI.getFeatureBits()[X86::FeatureFast15ByteNOP])
|
||||
if (STI.getFeatureBits()[X86::TuningFast15ByteNOP])
|
||||
return 15;
|
||||
if (STI.getFeatureBits()[X86::FeatureFast11ByteNOP])
|
||||
if (STI.getFeatureBits()[X86::TuningFast11ByteNOP])
|
||||
return 11;
|
||||
// FIXME: handle 32-bit mode
|
||||
// 15-bytes is the longest single NOP instruction, but 10-bytes is
|
||||
|
|
|
@ -367,120 +367,120 @@ def FeatureLVILoadHardening
|
|||
// X86 Subtarget Tuning features
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
|
||||
def TuningSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
|
||||
"SHLD instruction is slow">;
|
||||
|
||||
def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
|
||||
def TuningSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
|
||||
"PMULLD instruction is slow">;
|
||||
|
||||
def FeatureSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
|
||||
def TuningSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
|
||||
"true",
|
||||
"PMADDWD is slower than PMULLD">;
|
||||
|
||||
// FIXME: This should not apply to CPUs that do not have SSE.
|
||||
def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
|
||||
def TuningSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
|
||||
"IsUAMem16Slow", "true",
|
||||
"Slow unaligned 16-byte memory access">;
|
||||
|
||||
def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
|
||||
def TuningSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
|
||||
"IsUAMem32Slow", "true",
|
||||
"Slow unaligned 32-byte memory access">;
|
||||
|
||||
def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
|
||||
def TuningLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
|
||||
"Use LEA for adjusting the stack pointer">;
|
||||
|
||||
def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
|
||||
def TuningSlowDivide32 : SubtargetFeature<"idivl-to-divb",
|
||||
"HasSlowDivide32", "true",
|
||||
"Use 8-bit divide for positive values less than 256">;
|
||||
|
||||
def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl",
|
||||
def TuningSlowDivide64 : SubtargetFeature<"idivq-to-divl",
|
||||
"HasSlowDivide64", "true",
|
||||
"Use 32-bit divide for positive values less than 2^32">;
|
||||
|
||||
def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
|
||||
def TuningPadShortFunctions : SubtargetFeature<"pad-short-functions",
|
||||
"PadShortFunctions", "true",
|
||||
"Pad short functions">;
|
||||
|
||||
// On some processors, instructions that implicitly take two memory operands are
|
||||
// slow. In practice, this means that CALL, PUSH, and POP with memory operands
|
||||
// should be avoided in favor of a MOV + register CALL/PUSH/POP.
|
||||
def FeatureSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
|
||||
def TuningSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
|
||||
"SlowTwoMemOps", "true",
|
||||
"Two memory operand instructions are slow">;
|
||||
|
||||
def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
|
||||
def TuningLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
|
||||
"LEA instruction needs inputs at AG stage">;
|
||||
|
||||
def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
|
||||
def TuningSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
|
||||
"LEA instruction with certain arguments is slow">;
|
||||
|
||||
def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
|
||||
def TuningSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
|
||||
"LEA instruction with 3 ops or certain registers is slow">;
|
||||
|
||||
def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
|
||||
def TuningSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
|
||||
"INC and DEC instructions are slower than ADD and SUB">;
|
||||
|
||||
def FeaturePOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
|
||||
def TuningPOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
|
||||
"HasPOPCNTFalseDeps", "true",
|
||||
"POPCNT has a false dependency on dest register">;
|
||||
|
||||
def FeatureLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
|
||||
def TuningLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
|
||||
"HasLZCNTFalseDeps", "true",
|
||||
"LZCNT/TZCNT have a false dependency on dest register">;
|
||||
|
||||
// On recent X86 (port bound) processors, its preferable to combine to a single shuffle
|
||||
// using a variable mask over multiple fixed shuffles.
|
||||
def FeatureFastVariableCrossLaneShuffle
|
||||
def TuningFastVariableCrossLaneShuffle
|
||||
: SubtargetFeature<"fast-variable-crosslane-shuffle",
|
||||
"HasFastVariableCrossLaneShuffle",
|
||||
"true", "Cross-lane shuffles with variable masks are fast">;
|
||||
def FeatureFastVariablePerLaneShuffle
|
||||
def TuningFastVariablePerLaneShuffle
|
||||
: SubtargetFeature<"fast-variable-perlane-shuffle",
|
||||
"HasFastVariablePerLaneShuffle",
|
||||
"true", "Per-lane shuffles with variable masks are fast">;
|
||||
|
||||
// On some X86 processors, a vzeroupper instruction should be inserted after
|
||||
// using ymm/zmm registers before executing code that may use SSE instructions.
|
||||
def FeatureInsertVZEROUPPER
|
||||
def TuningInsertVZEROUPPER
|
||||
: SubtargetFeature<"vzeroupper",
|
||||
"InsertVZEROUPPER",
|
||||
"true", "Should insert vzeroupper instructions">;
|
||||
|
||||
// FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
|
||||
// than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if
|
||||
// TuningFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
|
||||
// than the corresponding NR code. TuningFastVectorFSQRT should be enabled if
|
||||
// vector FSQRT has higher throughput than the corresponding NR code.
|
||||
// The idea is that throughput bound code is likely to be vectorized, so for
|
||||
// vectorized code we should care about the throughput of SQRT operations.
|
||||
// But if the code is scalar that probably means that the code has some kind of
|
||||
// dependency and we should care more about reducing the latency.
|
||||
def FeatureFastScalarFSQRT
|
||||
def TuningFastScalarFSQRT
|
||||
: SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
|
||||
"true", "Scalar SQRT is fast (disable Newton-Raphson)">;
|
||||
def FeatureFastVectorFSQRT
|
||||
def TuningFastVectorFSQRT
|
||||
: SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
|
||||
"true", "Vector SQRT is fast (disable Newton-Raphson)">;
|
||||
|
||||
// If lzcnt has equivalent latency/throughput to most simple integer ops, it can
|
||||
// be used to replace test/set sequences.
|
||||
def FeatureFastLZCNT
|
||||
def TuningFastLZCNT
|
||||
: SubtargetFeature<
|
||||
"fast-lzcnt", "HasFastLZCNT", "true",
|
||||
"LZCNT instructions are as fast as most simple integer ops">;
|
||||
|
||||
// If the target can efficiently decode NOPs upto 7-bytes in length.
|
||||
def FeatureFast7ByteNOP
|
||||
def TuningFast7ByteNOP
|
||||
: SubtargetFeature<
|
||||
"fast-7bytenop", "HasFast7ByteNOP", "true",
|
||||
"Target can quickly decode up to 7 byte NOPs">;
|
||||
|
||||
// If the target can efficiently decode NOPs upto 11-bytes in length.
|
||||
def FeatureFast11ByteNOP
|
||||
def TuningFast11ByteNOP
|
||||
: SubtargetFeature<
|
||||
"fast-11bytenop", "HasFast11ByteNOP", "true",
|
||||
"Target can quickly decode up to 11 byte NOPs">;
|
||||
|
||||
// If the target can efficiently decode NOPs upto 15-bytes in length.
|
||||
def FeatureFast15ByteNOP
|
||||
def TuningFast15ByteNOP
|
||||
: SubtargetFeature<
|
||||
"fast-15bytenop", "HasFast15ByteNOP", "true",
|
||||
"Target can quickly decode up to 15 byte NOPs">;
|
||||
|
@ -488,21 +488,21 @@ def FeatureFast15ByteNOP
|
|||
// Sandy Bridge and newer processors can use SHLD with the same source on both
|
||||
// inputs to implement rotate to avoid the partial flag update of the normal
|
||||
// rotate instructions.
|
||||
def FeatureFastSHLDRotate
|
||||
def TuningFastSHLDRotate
|
||||
: SubtargetFeature<
|
||||
"fast-shld-rotate", "HasFastSHLDRotate", "true",
|
||||
"SHLD can be used as a faster rotate">;
|
||||
|
||||
// Bulldozer and newer processors can merge CMP/TEST (but not other
|
||||
// instructions) with conditional branches.
|
||||
def FeatureBranchFusion
|
||||
def TuningBranchFusion
|
||||
: SubtargetFeature<"branchfusion", "HasBranchFusion", "true",
|
||||
"CMP/TEST can be fused with conditional branches">;
|
||||
|
||||
// Sandy Bridge and newer processors have many instructions that can be
|
||||
// fused with conditional branches and pass through the CPU as a single
|
||||
// operation.
|
||||
def FeatureMacroFusion
|
||||
def TuningMacroFusion
|
||||
: SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
|
||||
"Various instructions can be fused with conditional branches">;
|
||||
|
||||
|
@ -510,50 +510,50 @@ def FeatureMacroFusion
|
|||
// generate Gathers on all AVX2 processors. But the overhead on HSW is high.
|
||||
// Skylake Client processor has faster Gathers than HSW and performance is
|
||||
// similar to Skylake Server (AVX-512).
|
||||
def FeatureFastGather
|
||||
def TuningFastGather
|
||||
: SubtargetFeature<"fast-gather", "HasFastGather", "true",
|
||||
"Indicates if gather is reasonably fast">;
|
||||
|
||||
def FeaturePrefer128Bit
|
||||
def TuningPrefer128Bit
|
||||
: SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true",
|
||||
"Prefer 128-bit AVX instructions">;
|
||||
|
||||
def FeaturePrefer256Bit
|
||||
def TuningPrefer256Bit
|
||||
: SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
|
||||
"Prefer 256-bit AVX instructions">;
|
||||
|
||||
def FeaturePreferMaskRegisters
|
||||
def TuningPreferMaskRegisters
|
||||
: SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true",
|
||||
"Prefer AVX512 mask registers over PTEST/MOVMSK">;
|
||||
|
||||
def FeatureFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
|
||||
def TuningFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
|
||||
"Indicates that the BEXTR instruction is implemented as a single uop "
|
||||
"with good throughput">;
|
||||
|
||||
// Combine vector math operations with shuffles into horizontal math
|
||||
// instructions if a CPU implements horizontal operations (introduced with
|
||||
// SSE3) with better latency/throughput than the alternative sequence.
|
||||
def FeatureFastHorizontalOps
|
||||
def TuningFastHorizontalOps
|
||||
: SubtargetFeature<
|
||||
"fast-hops", "HasFastHorizontalOps", "true",
|
||||
"Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
|
||||
"normal vector instructions with shuffles">;
|
||||
|
||||
def FeatureFastScalarShiftMasks
|
||||
def TuningFastScalarShiftMasks
|
||||
: SubtargetFeature<
|
||||
"fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true",
|
||||
"Prefer a left/right scalar logical shift pair over a shift+and pair">;
|
||||
|
||||
def FeatureFastVectorShiftMasks
|
||||
def TuningFastVectorShiftMasks
|
||||
: SubtargetFeature<
|
||||
"fast-vector-shift-masks", "HasFastVectorShiftMasks", "true",
|
||||
"Prefer a left/right vector logical shift pair over a shift+and pair">;
|
||||
|
||||
def FeatureFastMOVBE
|
||||
def TuningFastMOVBE
|
||||
: SubtargetFeature<"fast-movbe", "HasFastMOVBE", "true",
|
||||
"Prefer a movbe over a single-use load + bswap / single-use bswap + store">;
|
||||
|
||||
def FeatureUseGLMDivSqrtCosts
|
||||
def TuningUseGLMDivSqrtCosts
|
||||
: SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true",
|
||||
"Use Goldmont specific floating point div/sqrt costs">;
|
||||
|
||||
|
@ -631,8 +631,8 @@ def ProcessorFeatures {
|
|||
|
||||
// Nehalem
|
||||
list<SubtargetFeature> NHMFeatures = X86_64V2Features;
|
||||
list<SubtargetFeature> NHMTuning = [FeatureMacroFusion,
|
||||
FeatureInsertVZEROUPPER];
|
||||
list<SubtargetFeature> NHMTuning = [TuningMacroFusion,
|
||||
TuningInsertVZEROUPPER];
|
||||
|
||||
// Westmere
|
||||
list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL];
|
||||
|
@ -644,15 +644,15 @@ def ProcessorFeatures {
|
|||
list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX,
|
||||
FeatureXSAVE,
|
||||
FeatureXSAVEOPT];
|
||||
list<SubtargetFeature> SNBTuning = [FeatureMacroFusion,
|
||||
FeatureSlow3OpsLEA,
|
||||
FeatureSlowDivide64,
|
||||
FeatureSlowUAMem32,
|
||||
FeatureFastScalarFSQRT,
|
||||
FeatureFastSHLDRotate,
|
||||
FeatureFast15ByteNOP,
|
||||
FeaturePOPCNTFalseDeps,
|
||||
FeatureInsertVZEROUPPER];
|
||||
list<SubtargetFeature> SNBTuning = [TuningMacroFusion,
|
||||
TuningSlow3OpsLEA,
|
||||
TuningSlowDivide64,
|
||||
TuningSlowUAMem32,
|
||||
TuningFastScalarFSQRT,
|
||||
TuningFastSHLDRotate,
|
||||
TuningFast15ByteNOP,
|
||||
TuningPOPCNTFalseDeps,
|
||||
TuningInsertVZEROUPPER];
|
||||
list<SubtargetFeature> SNBFeatures =
|
||||
!listconcat(WSMFeatures, SNBAdditionalFeatures);
|
||||
|
||||
|
@ -673,17 +673,17 @@ def ProcessorFeatures {
|
|||
FeatureINVPCID,
|
||||
FeatureLZCNT,
|
||||
FeatureMOVBE];
|
||||
list<SubtargetFeature> HSWTuning = [FeatureMacroFusion,
|
||||
FeatureSlow3OpsLEA,
|
||||
FeatureSlowDivide64,
|
||||
FeatureFastScalarFSQRT,
|
||||
FeatureFastSHLDRotate,
|
||||
FeatureFast15ByteNOP,
|
||||
FeatureFastVariableCrossLaneShuffle,
|
||||
FeatureFastVariablePerLaneShuffle,
|
||||
FeaturePOPCNTFalseDeps,
|
||||
FeatureLZCNTFalseDeps,
|
||||
FeatureInsertVZEROUPPER];
|
||||
list<SubtargetFeature> HSWTuning = [TuningMacroFusion,
|
||||
TuningSlow3OpsLEA,
|
||||
TuningSlowDivide64,
|
||||
TuningFastScalarFSQRT,
|
||||
TuningFastSHLDRotate,
|
||||
TuningFast15ByteNOP,
|
||||
TuningFastVariableCrossLaneShuffle,
|
||||
TuningFastVariablePerLaneShuffle,
|
||||
TuningPOPCNTFalseDeps,
|
||||
TuningLZCNTFalseDeps,
|
||||
TuningInsertVZEROUPPER];
|
||||
list<SubtargetFeature> HSWFeatures =
|
||||
!listconcat(IVBFeatures, HSWAdditionalFeatures);
|
||||
|
||||
|
@ -700,18 +700,18 @@ def ProcessorFeatures {
|
|||
FeatureXSAVEC,
|
||||
FeatureXSAVES,
|
||||
FeatureCLFLUSHOPT];
|
||||
list<SubtargetFeature> SKLTuning = [FeatureFastGather,
|
||||
FeatureMacroFusion,
|
||||
FeatureSlow3OpsLEA,
|
||||
FeatureSlowDivide64,
|
||||
FeatureFastScalarFSQRT,
|
||||
FeatureFastVectorFSQRT,
|
||||
FeatureFastSHLDRotate,
|
||||
FeatureFast15ByteNOP,
|
||||
FeatureFastVariableCrossLaneShuffle,
|
||||
FeatureFastVariablePerLaneShuffle,
|
||||
FeaturePOPCNTFalseDeps,
|
||||
FeatureInsertVZEROUPPER];
|
||||
list<SubtargetFeature> SKLTuning = [TuningFastGather,
|
||||
TuningMacroFusion,
|
||||
TuningSlow3OpsLEA,
|
||||
TuningSlowDivide64,
|
||||
TuningFastScalarFSQRT,
|
||||
TuningFastVectorFSQRT,
|
||||
TuningFastSHLDRotate,
|
||||
TuningFast15ByteNOP,
|
||||
TuningFastVariableCrossLaneShuffle,
|
||||
TuningFastVariablePerLaneShuffle,
|
||||
TuningPOPCNTFalseDeps,
|
||||
TuningInsertVZEROUPPER];
|
||||
list<SubtargetFeature> SKLFeatures =
|
||||
!listconcat(BDWFeatures, SKLAdditionalFeatures);
|
||||
|
||||
|
@ -727,19 +727,19 @@ def ProcessorFeatures {
|
|||
FeatureVLX,
|
||||
FeaturePKU,
|
||||
FeatureCLWB];
|
||||
list<SubtargetFeature> SKXTuning = [FeatureFastGather,
|
||||
FeatureMacroFusion,
|
||||
FeatureSlow3OpsLEA,
|
||||
FeatureSlowDivide64,
|
||||
FeatureFastScalarFSQRT,
|
||||
FeatureFastVectorFSQRT,
|
||||
FeatureFastSHLDRotate,
|
||||
FeatureFast15ByteNOP,
|
||||
FeatureFastVariableCrossLaneShuffle,
|
||||
FeatureFastVariablePerLaneShuffle,
|
||||
FeaturePrefer256Bit,
|
||||
FeaturePOPCNTFalseDeps,
|
||||
FeatureInsertVZEROUPPER];
|
||||
list<SubtargetFeature> SKXTuning = [TuningFastGather,
|
||||
TuningMacroFusion,
|
||||
TuningSlow3OpsLEA,
|
||||
TuningSlowDivide64,
|
||||
TuningFastScalarFSQRT,
|
||||
TuningFastVectorFSQRT,
|
||||
TuningFastSHLDRotate,
|
||||
TuningFast15ByteNOP,
|
||||
TuningFastVariableCrossLaneShuffle,
|
||||
TuningFastVariablePerLaneShuffle,
|
||||
TuningPrefer256Bit,
|
||||
TuningPOPCNTFalseDeps,
|
||||
TuningInsertVZEROUPPER];
|
||||
list<SubtargetFeature> SKXFeatures =
|
||||
!listconcat(BDWFeatures, SKXAdditionalFeatures);
|
||||
|
||||
|
@ -765,18 +765,18 @@ def ProcessorFeatures {
|
|||
FeatureVBMI,
|
||||
FeatureIFMA,
|
||||
FeatureSHA];
|
||||
list<SubtargetFeature> CNLTuning = [FeatureFastGather,
|
||||
FeatureMacroFusion,
|
||||
FeatureSlow3OpsLEA,
|
||||
FeatureSlowDivide64,
|
||||
FeatureFastScalarFSQRT,
|
||||
FeatureFastVectorFSQRT,
|
||||
FeatureFastSHLDRotate,
|
||||
FeatureFast15ByteNOP,
|
||||
FeatureFastVariableCrossLaneShuffle,
|
||||
FeatureFastVariablePerLaneShuffle,
|
||||
FeaturePrefer256Bit,
|
||||
FeatureInsertVZEROUPPER];
|
||||
list<SubtargetFeature> CNLTuning = [TuningFastGather,
|
||||
TuningMacroFusion,
|
||||
TuningSlow3OpsLEA,
|
||||
TuningSlowDivide64,
|
||||
TuningFastScalarFSQRT,
|
||||
TuningFastVectorFSQRT,
|
||||
TuningFastSHLDRotate,
|
||||
TuningFast15ByteNOP,
|
||||
TuningFastVariableCrossLaneShuffle,
|
||||
TuningFastVariablePerLaneShuffle,
|
||||
TuningPrefer256Bit,
|
||||
TuningInsertVZEROUPPER];
|
||||
list<SubtargetFeature> CNLFeatures =
|
||||
!listconcat(SKLFeatures, CNLAdditionalFeatures);
|
||||
|
||||
|
@ -846,14 +846,14 @@ def ProcessorFeatures {
|
|||
FeatureMOVBE,
|
||||
FeatureLAHFSAHF];
|
||||
list<SubtargetFeature> AtomTuning = [ProcIntelAtom,
|
||||
FeatureSlowUAMem16,
|
||||
FeatureLEAForSP,
|
||||
FeatureSlowDivide32,
|
||||
FeatureSlowDivide64,
|
||||
FeatureSlowTwoMemOps,
|
||||
FeatureLEAUsesAG,
|
||||
FeaturePadShortFunctions,
|
||||
FeatureInsertVZEROUPPER];
|
||||
TuningSlowUAMem16,
|
||||
TuningLEAForSP,
|
||||
TuningSlowDivide32,
|
||||
TuningSlowDivide64,
|
||||
TuningSlowTwoMemOps,
|
||||
TuningLEAUsesAG,
|
||||
TuningPadShortFunctions,
|
||||
TuningInsertVZEROUPPER];
|
||||
|
||||
// Silvermont
|
||||
list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42,
|
||||
|
@ -862,15 +862,15 @@ def ProcessorFeatures {
|
|||
FeaturePRFCHW,
|
||||
FeatureRDRAND];
|
||||
list<SubtargetFeature> SLMTuning = [ProcIntelSLM,
|
||||
FeatureSlowTwoMemOps,
|
||||
FeatureSlowLEA,
|
||||
FeatureSlowIncDec,
|
||||
FeatureSlowDivide64,
|
||||
FeatureSlowPMULLD,
|
||||
FeatureFast7ByteNOP,
|
||||
FeatureFastMOVBE,
|
||||
FeaturePOPCNTFalseDeps,
|
||||
FeatureInsertVZEROUPPER];
|
||||
TuningSlowTwoMemOps,
|
||||
TuningSlowLEA,
|
||||
TuningSlowIncDec,
|
||||
TuningSlowDivide64,
|
||||
TuningSlowPMULLD,
|
||||
TuningFast7ByteNOP,
|
||||
TuningFastMOVBE,
|
||||
TuningPOPCNTFalseDeps,
|
||||
TuningInsertVZEROUPPER];
|
||||
list<SubtargetFeature> SLMFeatures =
|
||||
!listconcat(AtomFeatures, SLMAdditionalFeatures);
|
||||
|
||||
|
@ -884,25 +884,25 @@ def ProcessorFeatures {
|
|||
FeatureXSAVES,
|
||||
FeatureCLFLUSHOPT,
|
||||
FeatureFSGSBase];
|
||||
list<SubtargetFeature> GLMTuning = [FeatureUseGLMDivSqrtCosts,
|
||||
FeatureSlowTwoMemOps,
|
||||
FeatureSlowLEA,
|
||||
FeatureSlowIncDec,
|
||||
FeatureFastMOVBE,
|
||||
FeaturePOPCNTFalseDeps,
|
||||
FeatureInsertVZEROUPPER];
|
||||
list<SubtargetFeature> GLMTuning = [TuningUseGLMDivSqrtCosts,
|
||||
TuningSlowTwoMemOps,
|
||||
TuningSlowLEA,
|
||||
TuningSlowIncDec,
|
||||
TuningFastMOVBE,
|
||||
TuningPOPCNTFalseDeps,
|
||||
TuningInsertVZEROUPPER];
|
||||
list<SubtargetFeature> GLMFeatures =
|
||||
!listconcat(SLMFeatures, GLMAdditionalFeatures);
|
||||
|
||||
// Goldmont Plus
|
||||
list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE,
|
||||
FeatureRDPID];
|
||||
list<SubtargetFeature> GLPTuning = [FeatureUseGLMDivSqrtCosts,
|
||||
FeatureSlowTwoMemOps,
|
||||
FeatureSlowLEA,
|
||||
FeatureSlowIncDec,
|
||||
FeatureFastMOVBE,
|
||||
FeatureInsertVZEROUPPER];
|
||||
list<SubtargetFeature> GLPTuning = [TuningUseGLMDivSqrtCosts,
|
||||
TuningSlowTwoMemOps,
|
||||
TuningSlowLEA,
|
||||
TuningSlowIncDec,
|
||||
TuningFastMOVBE,
|
||||
TuningInsertVZEROUPPER];
|
||||
list<SubtargetFeature> GLPFeatures =
|
||||
!listconcat(GLMFeatures, GLPAdditionalFeatures);
|
||||
|
||||
|
@ -969,14 +969,14 @@ def ProcessorFeatures {
|
|||
FeatureBMI2,
|
||||
FeatureFMA,
|
||||
FeaturePRFCHW];
|
||||
list<SubtargetFeature> KNLTuning = [FeatureSlowDivide64,
|
||||
FeatureSlow3OpsLEA,
|
||||
FeatureSlowIncDec,
|
||||
FeatureSlowTwoMemOps,
|
||||
FeaturePreferMaskRegisters,
|
||||
FeatureFastGather,
|
||||
FeatureFastMOVBE,
|
||||
FeatureSlowPMADDWD];
|
||||
list<SubtargetFeature> KNLTuning = [TuningSlowDivide64,
|
||||
TuningSlow3OpsLEA,
|
||||
TuningSlowIncDec,
|
||||
TuningSlowTwoMemOps,
|
||||
TuningPreferMaskRegisters,
|
||||
TuningFastGather,
|
||||
TuningFastMOVBE,
|
||||
TuningSlowPMADDWD];
|
||||
// TODO Add AVX5124FMAPS/AVX5124VNNIW features
|
||||
list<SubtargetFeature> KNMFeatures =
|
||||
!listconcat(KNLFeatures, [FeatureVPOPCNTDQ]);
|
||||
|
@ -995,9 +995,9 @@ def ProcessorFeatures {
|
|||
FeatureLAHFSAHF,
|
||||
FeatureCMOV,
|
||||
Feature64Bit];
|
||||
list<SubtargetFeature> BarcelonaTuning = [FeatureFastScalarShiftMasks,
|
||||
FeatureSlowSHLD,
|
||||
FeatureInsertVZEROUPPER];
|
||||
list<SubtargetFeature> BarcelonaTuning = [TuningFastScalarShiftMasks,
|
||||
TuningSlowSHLD,
|
||||
TuningInsertVZEROUPPER];
|
||||
|
||||
// Bobcat
|
||||
list<SubtargetFeature> BtVer1Features = [FeatureX87,
|
||||
|
@ -1014,11 +1014,11 @@ def ProcessorFeatures {
|
|||
FeatureLZCNT,
|
||||
FeaturePOPCNT,
|
||||
FeatureLAHFSAHF];
|
||||
list<SubtargetFeature> BtVer1Tuning = [FeatureFast15ByteNOP,
|
||||
FeatureFastScalarShiftMasks,
|
||||
FeatureFastVectorShiftMasks,
|
||||
FeatureSlowSHLD,
|
||||
FeatureInsertVZEROUPPER];
|
||||
list<SubtargetFeature> BtVer1Tuning = [TuningFast15ByteNOP,
|
||||
TuningFastScalarShiftMasks,
|
||||
TuningFastVectorShiftMasks,
|
||||
TuningSlowSHLD,
|
||||
TuningInsertVZEROUPPER];
|
||||
|
||||
// Jaguar
|
||||
list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX,
|
||||
|
@ -1029,14 +1029,14 @@ def ProcessorFeatures {
|
|||
FeatureMOVBE,
|
||||
FeatureXSAVE,
|
||||
FeatureXSAVEOPT];
|
||||
list<SubtargetFeature> BtVer2Tuning = [FeatureFastLZCNT,
|
||||
FeatureFastBEXTR,
|
||||
FeatureFastHorizontalOps,
|
||||
FeatureFast15ByteNOP,
|
||||
FeatureFastScalarShiftMasks,
|
||||
FeatureFastVectorShiftMasks,
|
||||
FeatureFastMOVBE,
|
||||
FeatureSlowSHLD];
|
||||
list<SubtargetFeature> BtVer2Tuning = [TuningFastLZCNT,
|
||||
TuningFastBEXTR,
|
||||
TuningFastHorizontalOps,
|
||||
TuningFast15ByteNOP,
|
||||
TuningFastScalarShiftMasks,
|
||||
TuningFastVectorShiftMasks,
|
||||
TuningFastMOVBE,
|
||||
TuningSlowSHLD];
|
||||
list<SubtargetFeature> BtVer2Features =
|
||||
!listconcat(BtVer1Features, BtVer2AdditionalFeatures);
|
||||
|
||||
|
@ -1058,19 +1058,19 @@ def ProcessorFeatures {
|
|||
FeatureXSAVE,
|
||||
FeatureLWP,
|
||||
FeatureLAHFSAHF];
|
||||
list<SubtargetFeature> BdVer1Tuning = [FeatureSlowSHLD,
|
||||
FeatureFast11ByteNOP,
|
||||
FeatureFastScalarShiftMasks,
|
||||
FeatureBranchFusion,
|
||||
FeatureInsertVZEROUPPER];
|
||||
list<SubtargetFeature> BdVer1Tuning = [TuningSlowSHLD,
|
||||
TuningFast11ByteNOP,
|
||||
TuningFastScalarShiftMasks,
|
||||
TuningBranchFusion,
|
||||
TuningInsertVZEROUPPER];
|
||||
|
||||
// PileDriver
|
||||
list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C,
|
||||
FeatureBMI,
|
||||
FeatureTBM,
|
||||
FeatureFMA];
|
||||
list<SubtargetFeature> BdVer2AdditionalTuning = [FeatureFastBEXTR,
|
||||
FeatureFastMOVBE];
|
||||
list<SubtargetFeature> BdVer2AdditionalTuning = [TuningFastBEXTR,
|
||||
TuningFastMOVBE];
|
||||
list<SubtargetFeature> BdVer2Tuning =
|
||||
!listconcat(BdVer1Tuning, BdVer2AdditionalTuning);
|
||||
list<SubtargetFeature> BdVer2Features =
|
||||
|
@ -1127,14 +1127,14 @@ def ProcessorFeatures {
|
|||
FeatureXSAVEC,
|
||||
FeatureXSAVEOPT,
|
||||
FeatureXSAVES];
|
||||
list<SubtargetFeature> ZNTuning = [FeatureFastLZCNT,
|
||||
FeatureFastBEXTR,
|
||||
FeatureFast15ByteNOP,
|
||||
FeatureBranchFusion,
|
||||
FeatureFastScalarShiftMasks,
|
||||
FeatureFastMOVBE,
|
||||
FeatureSlowSHLD,
|
||||
FeatureInsertVZEROUPPER];
|
||||
list<SubtargetFeature> ZNTuning = [TuningFastLZCNT,
|
||||
TuningFastBEXTR,
|
||||
TuningFast15ByteNOP,
|
||||
TuningBranchFusion,
|
||||
TuningFastScalarShiftMasks,
|
||||
TuningFastMOVBE,
|
||||
TuningSlowSHLD,
|
||||
TuningInsertVZEROUPPER];
|
||||
list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
|
||||
FeatureRDPID,
|
||||
FeatureWBNOINVD];
|
||||
|
@ -1147,8 +1147,8 @@ def ProcessorFeatures {
|
|||
FeatureVAES,
|
||||
FeatureVPCLMULQDQ];
|
||||
list<SubtargetFeature> ZN3AdditionalTuning =
|
||||
[FeatureMacroFusion,
|
||||
FeatureFastVariablePerLaneShuffle];
|
||||
[TuningMacroFusion,
|
||||
TuningFastVariablePerLaneShuffle];
|
||||
list<SubtargetFeature> ZN3Tuning =
|
||||
!listconcat(ZNTuning, ZN3AdditionalTuning);
|
||||
list<SubtargetFeature> ZN3Features =
|
||||
|
@ -1175,37 +1175,37 @@ class ProcModel<string Name, SchedMachineModel Model,
|
|||
// It has no effect on code generation.
|
||||
def : ProcModel<"generic", SandyBridgeModel,
|
||||
[FeatureX87, FeatureCMPXCHG8B, Feature64Bit],
|
||||
[FeatureSlow3OpsLEA,
|
||||
FeatureSlowDivide64,
|
||||
FeatureSlowIncDec,
|
||||
FeatureMacroFusion,
|
||||
FeatureInsertVZEROUPPER]>;
|
||||
[TuningSlow3OpsLEA,
|
||||
TuningSlowDivide64,
|
||||
TuningSlowIncDec,
|
||||
TuningMacroFusion,
|
||||
TuningInsertVZEROUPPER]>;
|
||||
|
||||
def : Proc<"i386", [FeatureX87],
|
||||
[FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
|
||||
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
|
||||
def : Proc<"i486", [FeatureX87],
|
||||
[FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
|
||||
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
|
||||
def : Proc<"i586", [FeatureX87, FeatureCMPXCHG8B],
|
||||
[FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
|
||||
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
|
||||
def : Proc<"pentium", [FeatureX87, FeatureCMPXCHG8B],
|
||||
[FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
|
||||
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
|
||||
def : Proc<"pentium-mmx", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX],
|
||||
[FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
|
||||
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
|
||||
|
||||
def : Proc<"i686", [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV],
|
||||
[FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
|
||||
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
|
||||
def : Proc<"pentiumpro", [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV,
|
||||
FeatureNOPL],
|
||||
[FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
|
||||
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
|
||||
|
||||
def : Proc<"pentium2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureCMOV,
|
||||
FeatureFXSR, FeatureNOPL],
|
||||
[FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
|
||||
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
|
||||
|
||||
foreach P = ["pentium3", "pentium3m"] in {
|
||||
def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureMMX,
|
||||
FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV],
|
||||
[FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
|
||||
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
|
||||
}
|
||||
|
||||
// Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
|
||||
|
@ -1221,30 +1221,30 @@ foreach P = ["pentium3", "pentium3m"] in {
|
|||
def : ProcModel<"pentium-m", GenericPostRAModel,
|
||||
[FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE2,
|
||||
FeatureFXSR, FeatureNOPL, FeatureCMOV],
|
||||
[FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
|
||||
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
|
||||
|
||||
foreach P = ["pentium4", "pentium4m"] in {
|
||||
def : ProcModel<P, GenericPostRAModel,
|
||||
[FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE2,
|
||||
FeatureFXSR, FeatureNOPL, FeatureCMOV],
|
||||
[FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
|
||||
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
|
||||
}
|
||||
|
||||
// Intel Quark.
|
||||
def : Proc<"lakemont", [FeatureCMPXCHG8B],
|
||||
[FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
|
||||
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
|
||||
|
||||
// Intel Core Duo.
|
||||
def : ProcModel<"yonah", SandyBridgeModel,
|
||||
[FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE3,
|
||||
FeatureFXSR, FeatureNOPL, FeatureCMOV],
|
||||
[FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
|
||||
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
|
||||
|
||||
// NetBurst.
|
||||
def : ProcModel<"prescott", GenericPostRAModel,
|
||||
[FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE3,
|
||||
FeatureFXSR, FeatureNOPL, FeatureCMOV],
|
||||
[FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
|
||||
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
|
||||
def : ProcModel<"nocona", GenericPostRAModel, [
|
||||
FeatureX87,
|
||||
FeatureCMPXCHG8B,
|
||||
|
@ -1257,8 +1257,8 @@ def : ProcModel<"nocona", GenericPostRAModel, [
|
|||
FeatureCMPXCHG16B,
|
||||
],
|
||||
[
|
||||
FeatureSlowUAMem16,
|
||||
FeatureInsertVZEROUPPER
|
||||
TuningSlowUAMem16,
|
||||
TuningInsertVZEROUPPER
|
||||
]>;
|
||||
|
||||
// Intel Core 2 Solo/Duo.
|
||||
|
@ -1275,9 +1275,9 @@ def : ProcModel<"core2", SandyBridgeModel, [
|
|||
FeatureLAHFSAHF
|
||||
],
|
||||
[
|
||||
FeatureMacroFusion,
|
||||
FeatureSlowUAMem16,
|
||||
FeatureInsertVZEROUPPER
|
||||
TuningMacroFusion,
|
||||
TuningSlowUAMem16,
|
||||
TuningInsertVZEROUPPER
|
||||
]>;
|
||||
def : ProcModel<"penryn", SandyBridgeModel, [
|
||||
FeatureX87,
|
||||
|
@ -1292,9 +1292,9 @@ def : ProcModel<"penryn", SandyBridgeModel, [
|
|||
FeatureLAHFSAHF
|
||||
],
|
||||
[
|
||||
FeatureMacroFusion,
|
||||
FeatureSlowUAMem16,
|
||||
FeatureInsertVZEROUPPER
|
||||
TuningMacroFusion,
|
||||
TuningSlowUAMem16,
|
||||
TuningInsertVZEROUPPER
|
||||
]>;
|
||||
|
||||
// Atom CPUs.
|
||||
|
@ -1379,37 +1379,37 @@ def : ProcModel<"alderlake", SkylakeClientModel,
|
|||
// AMD CPUs.
|
||||
|
||||
def : Proc<"k6", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX],
|
||||
[FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
|
||||
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
|
||||
def : Proc<"k6-2", [FeatureX87, FeatureCMPXCHG8B, Feature3DNow],
|
||||
[FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
|
||||
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
|
||||
def : Proc<"k6-3", [FeatureX87, FeatureCMPXCHG8B, Feature3DNow],
|
||||
[FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
|
||||
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
|
||||
|
||||
foreach P = ["athlon", "athlon-tbird"] in {
|
||||
def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, Feature3DNowA,
|
||||
FeatureNOPL],
|
||||
[FeatureSlowSHLD, FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
|
||||
[TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
|
||||
}
|
||||
|
||||
foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
|
||||
def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV,
|
||||
FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL],
|
||||
[FeatureSlowSHLD, FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
|
||||
[TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
|
||||
}
|
||||
|
||||
foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
|
||||
def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE2, Feature3DNowA,
|
||||
FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureCMOV],
|
||||
[FeatureFastScalarShiftMasks, FeatureSlowSHLD, FeatureSlowUAMem16,
|
||||
FeatureInsertVZEROUPPER]>;
|
||||
[TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
|
||||
TuningInsertVZEROUPPER]>;
|
||||
}
|
||||
|
||||
foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
|
||||
def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE3, Feature3DNowA,
|
||||
FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureCMOV,
|
||||
Feature64Bit],
|
||||
[FeatureFastScalarShiftMasks, FeatureSlowSHLD, FeatureSlowUAMem16,
|
||||
FeatureInsertVZEROUPPER]>;
|
||||
[TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
|
||||
TuningInsertVZEROUPPER]>;
|
||||
}
|
||||
|
||||
foreach P = ["amdfam10", "barcelona"] in {
|
||||
|
@ -1445,17 +1445,17 @@ def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features,
|
|||
ProcessorFeatures.ZN3Tuning>;
|
||||
|
||||
def : Proc<"geode", [FeatureX87, FeatureCMPXCHG8B, Feature3DNowA],
|
||||
[FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
|
||||
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
|
||||
|
||||
def : Proc<"winchip-c6", [FeatureX87, FeatureMMX],
|
||||
[FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
|
||||
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
|
||||
def : Proc<"winchip2", [FeatureX87, Feature3DNow],
|
||||
[FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
|
||||
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
|
||||
def : Proc<"c3", [FeatureX87, Feature3DNow],
|
||||
[FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
|
||||
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
|
||||
def : Proc<"c3-2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX,
|
||||
FeatureSSE1, FeatureFXSR, FeatureCMOV],
|
||||
[FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
|
||||
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
|
||||
|
||||
// We also provide a generic 64-bit specific x86 processor model which tries to
|
||||
// be good for modern chips without enabling instruction set encodings past the
|
||||
|
@ -1469,11 +1469,11 @@ def : Proc<"c3-2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX,
|
|||
// forming a common base for them.
|
||||
def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features,
|
||||
[
|
||||
FeatureSlow3OpsLEA,
|
||||
FeatureSlowDivide64,
|
||||
FeatureSlowIncDec,
|
||||
FeatureMacroFusion,
|
||||
FeatureInsertVZEROUPPER
|
||||
TuningSlow3OpsLEA,
|
||||
TuningSlowDivide64,
|
||||
TuningSlowIncDec,
|
||||
TuningMacroFusion,
|
||||
TuningInsertVZEROUPPER
|
||||
]>;
|
||||
|
||||
// x86-64 micro-architecture levels.
|
||||
|
|
|
@ -1094,11 +1094,11 @@ static unsigned emitNop(MCStreamer &OS, unsigned NumBytes,
|
|||
if (Subtarget->is64Bit()) {
|
||||
// FIXME: We can use NOOPL on 32-bit targets with FeatureNOPL, but the
|
||||
// IndexReg/BaseReg below need to be updated.
|
||||
if (Subtarget->hasFeature(X86::FeatureFast7ByteNOP))
|
||||
if (Subtarget->hasFeature(X86::TuningFast7ByteNOP))
|
||||
MaxNopLength = 7;
|
||||
else if (Subtarget->hasFeature(X86::FeatureFast15ByteNOP))
|
||||
else if (Subtarget->hasFeature(X86::TuningFast15ByteNOP))
|
||||
MaxNopLength = 15;
|
||||
else if (Subtarget->hasFeature(X86::FeatureFast11ByteNOP))
|
||||
else if (Subtarget->hasFeature(X86::TuningFast11ByteNOP))
|
||||
MaxNopLength = 11;
|
||||
else
|
||||
MaxNopLength = 10;
|
||||
|
|
|
@ -45,48 +45,50 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
|
|||
X86::FeatureCMPXCHG16B,
|
||||
X86::FeatureLAHFSAHF,
|
||||
|
||||
// Codegen control options.
|
||||
X86::FeatureFast11ByteNOP,
|
||||
X86::FeatureFast15ByteNOP,
|
||||
X86::FeatureFastBEXTR,
|
||||
X86::FeatureFastHorizontalOps,
|
||||
X86::FeatureFastLZCNT,
|
||||
X86::FeatureFastScalarFSQRT,
|
||||
X86::FeatureFastSHLDRotate,
|
||||
X86::FeatureFastScalarShiftMasks,
|
||||
X86::FeatureFastVectorShiftMasks,
|
||||
X86::FeatureFastVariableCrossLaneShuffle,
|
||||
X86::FeatureFastVariablePerLaneShuffle,
|
||||
X86::FeatureFastVectorFSQRT,
|
||||
X86::FeatureLEAForSP,
|
||||
X86::FeatureLEAUsesAG,
|
||||
X86::FeatureLZCNTFalseDeps,
|
||||
X86::FeatureBranchFusion,
|
||||
X86::FeatureMacroFusion,
|
||||
X86::FeaturePadShortFunctions,
|
||||
X86::FeaturePOPCNTFalseDeps,
|
||||
// Some older targets can be setup to fold unaligned loads.
|
||||
X86::FeatureSSEUnalignedMem,
|
||||
X86::FeatureSlow3OpsLEA,
|
||||
X86::FeatureSlowDivide32,
|
||||
X86::FeatureSlowDivide64,
|
||||
X86::FeatureSlowIncDec,
|
||||
X86::FeatureSlowLEA,
|
||||
X86::FeatureSlowPMADDWD,
|
||||
X86::FeatureSlowPMULLD,
|
||||
X86::FeatureSlowSHLD,
|
||||
X86::FeatureSlowTwoMemOps,
|
||||
X86::FeatureSlowUAMem16,
|
||||
X86::FeaturePreferMaskRegisters,
|
||||
X86::FeatureInsertVZEROUPPER,
|
||||
X86::FeatureUseGLMDivSqrtCosts,
|
||||
|
||||
// Codegen control options.
|
||||
X86::TuningFast11ByteNOP,
|
||||
X86::TuningFast15ByteNOP,
|
||||
X86::TuningFastBEXTR,
|
||||
X86::TuningFastHorizontalOps,
|
||||
X86::TuningFastLZCNT,
|
||||
X86::TuningFastScalarFSQRT,
|
||||
X86::TuningFastSHLDRotate,
|
||||
X86::TuningFastScalarShiftMasks,
|
||||
X86::TuningFastVectorShiftMasks,
|
||||
X86::TuningFastVariableCrossLaneShuffle,
|
||||
X86::TuningFastVariablePerLaneShuffle,
|
||||
X86::TuningFastVectorFSQRT,
|
||||
X86::TuningLEAForSP,
|
||||
X86::TuningLEAUsesAG,
|
||||
X86::TuningLZCNTFalseDeps,
|
||||
X86::TuningBranchFusion,
|
||||
X86::TuningMacroFusion,
|
||||
X86::TuningPadShortFunctions,
|
||||
X86::TuningPOPCNTFalseDeps,
|
||||
X86::TuningSlow3OpsLEA,
|
||||
X86::TuningSlowDivide32,
|
||||
X86::TuningSlowDivide64,
|
||||
X86::TuningSlowIncDec,
|
||||
X86::TuningSlowLEA,
|
||||
X86::TuningSlowPMADDWD,
|
||||
X86::TuningSlowPMULLD,
|
||||
X86::TuningSlowSHLD,
|
||||
X86::TuningSlowTwoMemOps,
|
||||
X86::TuningSlowUAMem16,
|
||||
X86::TuningPreferMaskRegisters,
|
||||
X86::TuningInsertVZEROUPPER,
|
||||
X86::TuningUseGLMDivSqrtCosts,
|
||||
|
||||
// Perf-tuning flags.
|
||||
X86::FeatureFastGather,
|
||||
X86::FeatureSlowUAMem32,
|
||||
X86::TuningFastGather,
|
||||
X86::TuningSlowUAMem32,
|
||||
|
||||
// Based on whether user set the -mprefer-vector-width command line.
|
||||
X86::FeaturePrefer128Bit,
|
||||
X86::FeaturePrefer256Bit,
|
||||
X86::TuningPrefer128Bit,
|
||||
X86::TuningPrefer256Bit,
|
||||
|
||||
// CPU name enums. These just follow CPU string.
|
||||
X86::ProcIntelAtom,
|
||||
|
|
Loading…
Reference in New Issue