From 0c7af8c83bd1acb0ca78f35ddde29b6fde4363a0 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 4 Aug 2020 17:50:06 -0700 Subject: [PATCH] [X86] Optimize getImpliedDisabledFeatures & getImpliedEnabledFeatures after D83273 Previously the time complexity is O(|number of paths from the root to an implied feature| * CPU_FWATURE_MAX) where CPU_FEATURE_MAX is 92. The number of paths can be large (theoretically exponential). For an inline asm statement, there is a code path `clang::Parser::ParseAsmStatement -> clang::Sema::ActOnGCCAsmStmt -> ASTContext::getFunctionFeatureMap` leading to potentially many calls of getImpliedEnabledFeatures (41 for my -march=native case). We should improve the performance a bit in case the number of inline asm statements is large (Linux kernel builds). Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D85257 --- llvm/lib/Support/X86TargetParser.cpp | 39 +++++++++++++++++++++------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp index 572d1203aaf2..c629f872df12 100644 --- a/llvm/lib/Support/X86TargetParser.cpp +++ b/llvm/lib/Support/X86TargetParser.cpp @@ -37,6 +37,10 @@ public: set(I); } + bool any() const { + return llvm::any_of(Bits, [](uint64_t V) { return V != 0; }); + } + constexpr FeatureBitset &set(unsigned I) { // GCC <6.2 crashes if this is written in a single statement. uint32_t NewBits = Bits[I / 32] | (uint32_t(1) << (I % 32)); @@ -89,6 +93,13 @@ public: Result.Bits[I] = ~Bits[I]; return Result; } + + constexpr bool operator!=(const FeatureBitset &RHS) const { + for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I) + if (Bits[I] != RHS.Bits[I]) + return true; + return false; + } }; struct ProcInfo { @@ -552,11 +563,17 @@ void llvm::X86::getFeaturesForCPU(StringRef CPU, // For each feature that is (transitively) implied by this feature, set it. static void getImpliedEnabledFeatures(FeatureBitset &Bits, const FeatureBitset &Implies) { + // Fast path: Implies is often empty. + if (!Implies.any()) + return; + FeatureBitset Prev; Bits |= Implies; - for (unsigned i = 0; i != CPU_FEATURE_MAX; ++i) { - if (Implies[i]) - getImpliedEnabledFeatures(Bits, FeatureInfos[i].ImpliedFeatures); - } + do { + Prev = Bits; + for (unsigned i = CPU_FEATURE_MAX; i;) + if (Bits[--i]) + Bits |= FeatureInfos[i].ImpliedFeatures; + } while (Prev != Bits); } /// Create bit vector of features that are implied disabled if the feature @@ -564,12 +581,14 @@ static void getImpliedEnabledFeatures(FeatureBitset &Bits, static void getImpliedDisabledFeatures(FeatureBitset &Bits, unsigned Value) { // Check all features looking for any dependent on this feature. If we find // one, mark it and recursively find any feature that depend on it. - for (unsigned i = 0; i != CPU_FEATURE_MAX; ++i) { - if (FeatureInfos[i].ImpliedFeatures[Value]) { - Bits.set(i); - getImpliedDisabledFeatures(Bits, i); - } - } + FeatureBitset Prev; + Bits.set(Value); + do { + Prev = Bits; + for (unsigned i = 0; i != CPU_FEATURE_MAX; ++i) + if ((FeatureInfos[i].ImpliedFeatures & Bits).any()) + Bits.set(i); + } while (Prev != Bits); } void llvm::X86::getImpliedFeatures(