From b0b32e649011d9a60165b9b53eb2764b7da9c8ca Mon Sep 17 00:00:00 2001 From: sstefan1 Date: Wed, 12 Aug 2020 12:20:53 +0200 Subject: [PATCH] [OpenMPOpt] ICV tracking for calls Introduce two new AAs. AAICVTrackerFunctionReturned which checks if a function can have a unique ICV value after it is finished, and AAICVCallSiteReturned which checks AAICVTrackerFunctionReturned for a call site. This enables us to check the value of a call and if it changes the ICV. This also changes the approach in `getReplacementValues()` to a worklist-based approach so we can explore all relevant BBs. Differential Revision: https://reviews.llvm.org/D85544 --- llvm/lib/Transforms/IPO/Attributor.cpp | 8 +- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 486 ++++++++++++++----- llvm/test/Transforms/OpenMP/icv_tracking.ll | 511 ++++++++++++++++++++ 3 files changed, 889 insertions(+), 116 deletions(-) diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 6cd3e059c3a1..160d88858269 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -901,13 +901,15 @@ bool Attributor::checkForAllInstructions(function_ref Pred, // TODO: use the function scope once we have call site AAReturnedValues. const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction); - const auto &LivenessAA = - getAAFor(QueryingAA, QueryIRP, /* TrackDependence */ false); + const auto *LivenessAA = + CheckBBLivenessOnly ? nullptr + : &(getAAFor(QueryingAA, QueryIRP, + /* TrackDependence */ false)); auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(*AssociatedFunction); if (!checkForAllInstructionsImpl(this, OpcodeInstMap, Pred, &QueryingAA, - &LivenessAA, Opcodes, CheckBBLivenessOnly)) + LivenessAA, Opcodes, CheckBBLivenessOnly)) return false; return true; diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index ae7bafd7d91e..5121574c5e6c 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -89,43 +89,6 @@ static void foreachUse(Function &F, CBTy CB, } } -/// Helper struct to store tracked ICV values at specif instructions. -struct ICVValue { - Instruction *Inst; - Value *TrackedValue; - - ICVValue(Instruction *I, Value *Val) : Inst(I), TrackedValue(Val) {} -}; - -namespace llvm { - -// Provide DenseMapInfo for ICVValue -template <> struct DenseMapInfo { - using InstInfo = DenseMapInfo; - using ValueInfo = DenseMapInfo; - - static inline ICVValue getEmptyKey() { - return ICVValue(InstInfo::getEmptyKey(), ValueInfo::getEmptyKey()); - }; - - static inline ICVValue getTombstoneKey() { - return ICVValue(InstInfo::getTombstoneKey(), ValueInfo::getTombstoneKey()); - }; - - static unsigned getHashValue(const ICVValue &ICVVal) { - return detail::combineHashValue( - InstInfo::getHashValue(ICVVal.Inst), - ValueInfo::getHashValue(ICVVal.TrackedValue)); - } - - static bool isEqual(const ICVValue &LHS, const ICVValue &RHS) { - return InstInfo::isEqual(LHS.Inst, RHS.Inst) && - ValueInfo::isEqual(LHS.TrackedValue, RHS.TrackedValue); - } -}; - -} // end namespace llvm - namespace { struct AAICVTracker; @@ -1017,11 +980,28 @@ private: /// Populate the Attributor with abstract attribute opportunities in the /// function. void registerAAs() { - for (Function *F : SCC) { - if (F->isDeclaration()) - continue; + if (SCC.empty()) + return; - A.getOrCreateAAFor(IRPosition::function(*F)); + // Create CallSite AA for all Getters. + for (int Idx = 0; Idx < OMPInfoCache.ICVs.size() - 1; ++Idx) { + auto ICVInfo = OMPInfoCache.ICVs[static_cast(Idx)]; + + auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter]; + + auto CreateAA = [&](Use &U, Function &Caller) { + CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI); + if (!CI) + return false; + + auto &CB = cast(*CI); + + IRPosition CBPos = IRPosition::callsite_function(CB); + A.getOrCreateAAFor(CBPos); + return false; + }; + + GetterRFI.foreachUse(SCC, CreateAA); } } }; @@ -1223,6 +1203,12 @@ struct AAICVTracker : public StateWrapper { using Base = StateWrapper; AAICVTracker(const IRPosition &IRP, Attributor &A) : Base(IRP) {} + void initialize(Attributor &A) override { + Function *F = getAnchorScope(); + if (!F || !A.isFunctionIPOAmendable(*F)) + indicatePessimisticFixpoint(); + } + /// Returns true if value is assumed to be tracked. bool isAssumedTracked() const { return getAssumed(); } @@ -1233,8 +1219,21 @@ struct AAICVTracker : public StateWrapper { static AAICVTracker &createForPosition(const IRPosition &IRP, Attributor &A); /// Return the value with which \p I can be replaced for specific \p ICV. - virtual Value *getReplacementValue(InternalControlVar ICV, - const Instruction *I, Attributor &A) = 0; + virtual Optional getReplacementValue(InternalControlVar ICV, + const Instruction *I, + Attributor &A) const { + return None; + } + + /// Return an assumed unique ICV value if a single candidate is found. If + /// there cannot be one, return a nullptr. If it is not clear yet, return the + /// Optional::NoneType. + virtual Optional + getUniqueReplacementValue(InternalControlVar ICV) const = 0; + + // Currently only nthreads is being tracked. + // this array will only grow with time. + InternalControlVar TrackableICVs[1] = {ICV_nthreads}; /// See AbstractAttribute::getName() const std::string getName() const override { return "AAICVTracker"; } @@ -1255,57 +1254,20 @@ struct AAICVTrackerFunction : public AAICVTracker { : AAICVTracker(IRP, A) {} // FIXME: come up with better string. - const std::string getAsStr() const override { return "ICVTracker"; } + const std::string getAsStr() const override { return "ICVTrackerFunction"; } // FIXME: come up with some stats. void trackStatistics() const override {} - /// TODO: decide whether to deduplicate here, or use current - /// deduplicateRuntimeCalls function. + /// We don't manifest anything for this AA. ChangeStatus manifest(Attributor &A) override { - ChangeStatus Changed = ChangeStatus::UNCHANGED; - - for (InternalControlVar &ICV : TrackableICVs) - if (deduplicateICVGetters(ICV, A)) - Changed = ChangeStatus::CHANGED; - - return Changed; - } - - bool deduplicateICVGetters(InternalControlVar &ICV, Attributor &A) { - auto &OMPInfoCache = static_cast(A.getInfoCache()); - auto &ICVInfo = OMPInfoCache.ICVs[ICV]; - auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter]; - - bool Changed = false; - - auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) { - CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI); - Instruction *UserI = cast(U.getUser()); - Value *ReplVal = getReplacementValue(ICV, UserI, A); - - if (!ReplVal || !CI) - return false; - - A.removeCallSite(CI); - CI->replaceAllUsesWith(ReplVal); - CI->eraseFromParent(); - Changed = true; - return true; - }; - - GetterRFI.foreachUse(ReplaceAndDeleteCB, getAnchorScope()); - return Changed; + return ChangeStatus::UNCHANGED; } // Map of ICV to their values at specific program point. - EnumeratedArray, InternalControlVar, + EnumeratedArray, InternalControlVar, InternalControlVar::ICV___last> - ICVValuesMap; - - // Currently only nthreads is being tracked. - // this array will only grow with time. - InternalControlVar TrackableICVs[1] = {ICV_nthreads}; + ICVReplacementValuesMap; ChangeStatus updateImpl(Attributor &A) override { ChangeStatus HasChanged = ChangeStatus::UNCHANGED; @@ -1317,6 +1279,7 @@ struct AAICVTrackerFunction : public AAICVTracker { for (InternalControlVar ICV : TrackableICVs) { auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter]; + auto &ValuesMap = ICVReplacementValuesMap[ICV]; auto TrackValues = [&](Use &U, Function &) { CallInst *CI = OpenMPOpt::getCallIfRegularCall(U); if (!CI) @@ -1324,51 +1287,338 @@ struct AAICVTrackerFunction : public AAICVTracker { // FIXME: handle setters with more that 1 arguments. /// Track new value. - if (ICVValuesMap[ICV].insert(ICVValue(CI, CI->getArgOperand(0)))) + if (ValuesMap.insert(std::make_pair(CI, CI->getArgOperand(0))).second) HasChanged = ChangeStatus::CHANGED; return false; }; + auto CallCheck = [&](Instruction &I) { + Optional ReplVal = getValueForCall(A, &I, ICV); + if (ReplVal.hasValue() && + ValuesMap.insert(std::make_pair(&I, *ReplVal)).second) + HasChanged = ChangeStatus::CHANGED; + + return true; + }; + + // Track all changes of an ICV. SetterRFI.foreachUse(TrackValues, F); + + A.checkForAllInstructions(CallCheck, *this, {Instruction::Call}, + /* CheckBBLivenessOnly */ true); + + /// TODO: Figure out a way to avoid adding entry in + /// ICVReplacementValuesMap + Instruction *Entry = &F->getEntryBlock().front(); + if (HasChanged == ChangeStatus::CHANGED && !ValuesMap.count(Entry)) + ValuesMap.insert(std::make_pair(Entry, nullptr)); } return HasChanged; } - /// Return the value with which \p I can be replaced for specific \p ICV. - Value *getReplacementValue(InternalControlVar ICV, const Instruction *I, - Attributor &A) override { - const BasicBlock *CurrBB = I->getParent(); + /// Hepler to check if \p I is a call and get the value for it if it is + /// unique. + Optional getValueForCall(Attributor &A, const Instruction *I, + InternalControlVar &ICV) const { + + const auto *CB = dyn_cast(I); + if (!CB) + return None; - auto &ValuesSet = ICVValuesMap[ICV]; auto &OMPInfoCache = static_cast(A.getInfoCache()); auto &GetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Getter]; + auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter]; + Function *CalledFunction = CB->getCalledFunction(); - for (const auto &ICVVal : ValuesSet) { - if (CurrBB == ICVVal.Inst->getParent()) { - if (!ICVVal.Inst->comesBefore(I)) - continue; + if (CalledFunction == GetterRFI.Declaration) + return None; + if (CalledFunction == SetterRFI.Declaration) { + if (ICVReplacementValuesMap[ICV].count(I)) + return ICVReplacementValuesMap[ICV].lookup(I); - // both instructions are in the same BB and at \p I we know the ICV - // value. - while (I != ICVVal.Inst) { - // we don't yet know if a call might update an ICV. - // TODO: check callsite AA for value. - if (const auto *CB = dyn_cast(I)) - if (CB->getCalledFunction() != GetterRFI.Declaration) + return nullptr; + } + + // Since we don't know, assume it changes the ICV. + if (CalledFunction->isDeclaration()) + return nullptr; + + const auto &ICVTrackingAA = + A.getAAFor(*this, IRPosition::callsite_returned(*CB)); + + if (ICVTrackingAA.isAssumedTracked()) + return ICVTrackingAA.getUniqueReplacementValue(ICV); + + // If we don't know, assume it changes. + return nullptr; + } + + // We don't check unique value for a function, so return None. + Optional + getUniqueReplacementValue(InternalControlVar ICV) const override { + return None; + } + + /// Return the value with which \p I can be replaced for specific \p ICV. + Optional getReplacementValue(InternalControlVar ICV, + const Instruction *I, + Attributor &A) const override { + const auto &ValuesMap = ICVReplacementValuesMap[ICV]; + if (ValuesMap.count(I)) + return ValuesMap.lookup(I); + + SmallVector Worklist; + SmallPtrSet Visited; + Worklist.push_back(I); + + Optional ReplVal; + + while (!Worklist.empty()) { + const Instruction *CurrInst = Worklist.pop_back_val(); + if (!Visited.insert(CurrInst).second) + continue; + + const BasicBlock *CurrBB = CurrInst->getParent(); + + // Go up and look for all potential setters/calls that might change the + // ICV. + while ((CurrInst = CurrInst->getPrevNode())) { + if (ValuesMap.count(CurrInst)) { + Optional NewReplVal = ValuesMap.lookup(CurrInst); + // Unknown value, track new. + if (!ReplVal.hasValue()) { + ReplVal = NewReplVal; + break; + } + + // If we found a new value, we can't know the icv value anymore. + if (NewReplVal.hasValue()) + if (ReplVal != NewReplVal) return nullptr; - I = I->getPrevNode(); + break; } - // No call in between, return the value. - return ICVVal.TrackedValue; + Optional NewReplVal = getValueForCall(A, CurrInst, ICV); + if (!NewReplVal.hasValue()) + continue; + + // Unknown value, track new. + if (!ReplVal.hasValue()) { + ReplVal = NewReplVal; + break; + } + + // if (NewReplVal.hasValue()) + // We found a new value, we can't know the icv value anymore. + if (ReplVal != NewReplVal) + return nullptr; + } + + // If we are in the same BB and we have a value, we are done. + if (CurrBB == I->getParent() && ReplVal.hasValue()) + return ReplVal; + + // Go through all predecessors and add terminators for analysis. + for (const BasicBlock *Pred : predecessors(CurrBB)) + if (const Instruction *Terminator = Pred->getTerminator()) + Worklist.push_back(Terminator); + } + + return ReplVal; + } +}; + +struct AAICVTrackerFunctionReturned : AAICVTracker { + AAICVTrackerFunctionReturned(const IRPosition &IRP, Attributor &A) + : AAICVTracker(IRP, A) {} + + // FIXME: come up with better string. + const std::string getAsStr() const override { + return "ICVTrackerFunctionReturned"; + } + + // FIXME: come up with some stats. + void trackStatistics() const override {} + + /// We don't manifest anything for this AA. + ChangeStatus manifest(Attributor &A) override { + return ChangeStatus::UNCHANGED; + } + + // Map of ICV to their values at specific program point. + EnumeratedArray, InternalControlVar, + InternalControlVar::ICV___last> + ICVReplacementValuesMap; + + /// Return the value with which \p I can be replaced for specific \p ICV. + Optional + getUniqueReplacementValue(InternalControlVar ICV) const override { + return ICVReplacementValuesMap[ICV]; + } + + ChangeStatus updateImpl(Attributor &A) override { + ChangeStatus Changed = ChangeStatus::UNCHANGED; + const auto &ICVTrackingAA = A.getAAFor( + *this, IRPosition::function(*getAnchorScope())); + + if (!ICVTrackingAA.isAssumedTracked()) + return indicatePessimisticFixpoint(); + + for (InternalControlVar ICV : TrackableICVs) { + Optional &ReplVal = ICVReplacementValuesMap[ICV]; + Optional UniqueICVValue; + + auto CheckReturnInst = [&](Instruction &I) { + Optional NewReplVal = + ICVTrackingAA.getReplacementValue(ICV, &I, A); + + // If we found a second ICV value there is no unique returned value. + if (UniqueICVValue.hasValue() && UniqueICVValue != NewReplVal) + return false; + + UniqueICVValue = NewReplVal; + + return true; + }; + + if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret}, + /* CheckBBLivenessOnly */ true)) + UniqueICVValue = nullptr; + + if (UniqueICVValue == ReplVal) + continue; + + ReplVal = UniqueICVValue; + Changed = ChangeStatus::CHANGED; + } + + return Changed; + } +}; + +struct AAICVTrackerCallSite : AAICVTracker { + AAICVTrackerCallSite(const IRPosition &IRP, Attributor &A) + : AAICVTracker(IRP, A) {} + + void initialize(Attributor &A) override { + Function *F = getAnchorScope(); + if (!F || !A.isFunctionIPOAmendable(*F)) + indicatePessimisticFixpoint(); + + // We only initialize this AA for getters, so we need to know which ICV it + // gets. + auto &OMPInfoCache = static_cast(A.getInfoCache()); + for (InternalControlVar ICV : TrackableICVs) { + auto ICVInfo = OMPInfoCache.ICVs[ICV]; + auto &Getter = OMPInfoCache.RFIs[ICVInfo.Getter]; + if (Getter.Declaration == getAssociatedFunction()) { + AssociatedICV = ICVInfo.Kind; + return; } } - // No value was tracked. - return nullptr; + /// Unknown ICV. + indicatePessimisticFixpoint(); + } + + ChangeStatus manifest(Attributor &A) override { + if (!ReplVal.hasValue() || !ReplVal.getValue()) + return ChangeStatus::UNCHANGED; + + A.changeValueAfterManifest(*getCtxI(), **ReplVal); + A.deleteAfterManifest(*getCtxI()); + + return ChangeStatus::CHANGED; + } + + // FIXME: come up with better string. + const std::string getAsStr() const override { return "ICVTrackerCallSite"; } + + // FIXME: come up with some stats. + void trackStatistics() const override {} + + InternalControlVar AssociatedICV; + Optional ReplVal; + + ChangeStatus updateImpl(Attributor &A) override { + const auto &ICVTrackingAA = A.getAAFor( + *this, IRPosition::function(*getAnchorScope())); + + // We don't have any information, so we assume it changes the ICV. + if (!ICVTrackingAA.isAssumedTracked()) + return indicatePessimisticFixpoint(); + + Optional NewReplVal = + ICVTrackingAA.getReplacementValue(AssociatedICV, getCtxI(), A); + + if (ReplVal == NewReplVal) + return ChangeStatus::UNCHANGED; + + ReplVal = NewReplVal; + return ChangeStatus::CHANGED; + } + + // Return the value with which associated value can be replaced for specific + // \p ICV. + Optional + getUniqueReplacementValue(InternalControlVar ICV) const override { + return ReplVal; + } +}; + +struct AAICVTrackerCallSiteReturned : AAICVTracker { + AAICVTrackerCallSiteReturned(const IRPosition &IRP, Attributor &A) + : AAICVTracker(IRP, A) {} + + // FIXME: come up with better string. + const std::string getAsStr() const override { + return "ICVTrackerCallSiteReturned"; + } + + // FIXME: come up with some stats. + void trackStatistics() const override {} + + /// We don't manifest anything for this AA. + ChangeStatus manifest(Attributor &A) override { + return ChangeStatus::UNCHANGED; + } + + // Map of ICV to their values at specific program point. + EnumeratedArray, InternalControlVar, + InternalControlVar::ICV___last> + ICVReplacementValuesMap; + + /// Return the value with which associated value can be replaced for specific + /// \p ICV. + Optional + getUniqueReplacementValue(InternalControlVar ICV) const override { + return ICVReplacementValuesMap[ICV]; + } + + ChangeStatus updateImpl(Attributor &A) override { + ChangeStatus Changed = ChangeStatus::UNCHANGED; + const auto &ICVTrackingAA = A.getAAFor( + *this, IRPosition::returned(*getAssociatedFunction())); + + // We don't have any information, so we assume it changes the ICV. + if (!ICVTrackingAA.isAssumedTracked()) + return indicatePessimisticFixpoint(); + + for (InternalControlVar ICV : TrackableICVs) { + Optional &ReplVal = ICVReplacementValuesMap[ICV]; + Optional NewReplVal = + ICVTrackingAA.getUniqueReplacementValue(ICV); + + if (ReplVal == NewReplVal) + continue; + + ReplVal = NewReplVal; + Changed = ChangeStatus::CHANGED; + } + return Changed; } }; } // namespace @@ -1382,14 +1632,20 @@ AAICVTracker &AAICVTracker::createForPosition(const IRPosition &IRP, case IRPosition::IRP_INVALID: case IRPosition::IRP_FLOAT: case IRPosition::IRP_ARGUMENT: - case IRPosition::IRP_RETURNED: - case IRPosition::IRP_CALL_SITE_RETURNED: case IRPosition::IRP_CALL_SITE_ARGUMENT: - case IRPosition::IRP_CALL_SITE: - llvm_unreachable("ICVTracker can only be created for function position!"); + llvm_unreachable("ICVTracker: invalid IRPosition!"); case IRPosition::IRP_FUNCTION: AA = new (A.Allocator) AAICVTrackerFunction(IRP, A); break; + case IRPosition::IRP_RETURNED: + AA = new (A.Allocator) AAICVTrackerFunctionReturned(IRP, A); + break; + case IRPosition::IRP_CALL_SITE_RETURNED: + AA = new (A.Allocator) AAICVTrackerCallSiteReturned(IRP, A); + break; + case IRPosition::IRP_CALL_SITE: + AA = new (A.Allocator) AAICVTrackerCallSite(IRP, A); + break; } return *AA; @@ -1439,7 +1695,9 @@ PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C, OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator, /*CGSCC*/ Functions, OMPInModule.getKernels()); - Attributor A(Functions, InfoCache, CGUpdater); + SetVector ModuleSlice(InfoCache.ModuleSlice.begin(), + InfoCache.ModuleSlice.end()); + Attributor A(ModuleSlice, InfoCache, CGUpdater); // TODO: Compute the module slice we are allowed to look at. OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); @@ -1516,7 +1774,9 @@ struct OpenMPOptLegacyPass : public CallGraphSCCPass { *(Functions.back()->getParent()), AG, Allocator, /*CGSCC*/ Functions, OMPInModule.getKernels()); - Attributor A(Functions, InfoCache, CGUpdater); + SetVector ModuleSlice(InfoCache.ModuleSlice.begin(), + InfoCache.ModuleSlice.end()); + Attributor A(ModuleSlice, InfoCache, CGUpdater); // TODO: Compute the module slice we are allowed to look at. OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); diff --git a/llvm/test/Transforms/OpenMP/icv_tracking.ll b/llvm/test/Transforms/OpenMP/icv_tracking.ll index c2b5d40ce97a..19b55cc661b0 100644 --- a/llvm/test/Transforms/OpenMP/icv_tracking.ll +++ b/llvm/test/Transforms/OpenMP/icv_tracking.ll @@ -7,6 +7,29 @@ @.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 @0 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8 +; doesn't modify any ICVs. +define i32 @icv_free_use(i32 %0) { +; CHECK-LABEL: define {{[^@]+}}@icv_free_use +; CHECK-SAME: (i32 [[TMP0:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[TMP0]], 1 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %2 = add nsw i32 %0, 1 + ret i32 %2 +} + +define i32 @bad_use(i32 %0) { +; CHECK-LABEL: define {{[^@]+}}@bad_use +; CHECK-SAME: (i32 [[TMP0:%.*]]) +; CHECK-NEXT: tail call void @use(i32 [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[TMP0]], 1 +; CHECK-NEXT: ret i32 [[TMP2]] +; + tail call void @use(i32 %0) + %2 = add nsw i32 %0, 1 + ret i32 %2 +} + define dso_local i32 @foo(i32 %0, i32 %1) { ; CHECK-LABEL: define {{[^@]+}}@foo ; CHECK-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) @@ -105,5 +128,493 @@ define internal void @.omp_outlined..1(i32* %0, i32* %1) { ret void } +define dso_local i32 @bar1(i32 %0, i32 %1) { +; CHECK-LABEL: define {{[^@]+}}@bar1 +; CHECK-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP0]], i32 [[TMP1]] +; CHECK-NEXT: tail call void @omp_set_num_threads(i32 [[TMP4]]) +; CHECK-NEXT: tail call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @omp_get_max_threads() +; CHECK-NEXT: tail call void @use(i32 [[TMP5]]) +; CHECK-NEXT: ret i32 0 +; + %3 = icmp sgt i32 %0, %1 + %4 = select i1 %3, i32 %0, i32 %1 + tail call void @omp_set_num_threads(i32 %4) + %5 = tail call i32 @omp_get_max_threads() + tail call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) + %6 = tail call i32 @omp_get_max_threads() + tail call void @use(i32 %6) + ret i32 0 +} + +define internal void @.omp_outlined..2(i32* %0, i32* %1) { +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2 +; CHECK-SAME: (i32* [[TMP0:%.*]], i32* [[TMP1:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @omp_get_max_threads() +; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @icv_free_use(i32 [[TMP3]]) +; CHECK-NEXT: tail call void @omp_set_num_threads(i32 10) +; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @icv_free_use(i32 10) +; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 @icv_free_use(i32 10) +; CHECK-NEXT: ret void +; + %3 = tail call i32 @omp_get_max_threads() + %4 = tail call i32 @icv_free_use(i32 %3) + tail call void @omp_set_num_threads(i32 10) + %5 = tail call i32 @omp_get_max_threads() + %6 = tail call i32 @icv_free_use(i32 %5) + %7 = tail call i32 @omp_get_max_threads() + %8 = tail call i32 @icv_free_use(i32 %7) + ret void +} +define void @test(i1 %0) { +; CHECK-LABEL: define {{[^@]+}}@test +; CHECK-SAME: (i1 [[TMP0:%.*]]) +; CHECK-NEXT: call void @omp_set_num_threads(i32 2) +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i1 [[TMP0]], false +; CHECK-NEXT: br i1 [[TMP2]], label [[TMP4:%.*]], label [[TMP3:%.*]] +; CHECK: 3: +; CHECK-NEXT: call void @use(i32 10) +; CHECK-NEXT: br label [[TMP4]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @omp_get_max_threads() +; CHECK-NEXT: call void @use(i32 [[TMP5]]) +; CHECK-NEXT: ret void +; + call void @omp_set_num_threads(i32 2) + %2 = icmp eq i1 %0, 0 + br i1 %2, label %4, label %3 + +3: ; preds = %1 + call void @use(i32 10) + br label %4 + +4: ; preds = %3, %1 + %5 = call i32 @omp_get_max_threads() + call void @use(i32 %5) + ret void +} + +define void @test1(i1 %0) { +; CHECK-LABEL: define {{[^@]+}}@test1 +; CHECK-SAME: (i1 [[TMP0:%.*]]) +; CHECK-NEXT: call void @omp_set_num_threads(i32 2) +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i1 [[TMP0]], false +; CHECK-NEXT: br i1 [[TMP2]], label [[TMP5:%.*]], label [[TMP3:%.*]] +; CHECK: 3: +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @icv_free_use(i32 10) +; CHECK-NEXT: br label [[TMP5]] +; CHECK: 5: +; CHECK-NEXT: call void @use(i32 2) +; CHECK-NEXT: ret void +; + call void @omp_set_num_threads(i32 2) + %2 = icmp eq i1 %0, 0 + br i1 %2, label %5, label %3 + +3: ; preds = %1 + %4 = call i32 @icv_free_use(i32 10) + br label %5 + +5: ; preds = %3, %1 + %6 = call i32 @omp_get_max_threads() + call void @use(i32 %6) + ret void +} + +define void @bad_use_test(i1 %0) { +; CHECK-LABEL: define {{[^@]+}}@bad_use_test +; CHECK-SAME: (i1 [[TMP0:%.*]]) +; CHECK-NEXT: call void @omp_set_num_threads(i32 2) +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i1 [[TMP0]], false +; CHECK-NEXT: br i1 [[TMP2]], label [[TMP5:%.*]], label [[TMP3:%.*]] +; CHECK: 3: +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @bad_use(i32 10) +; CHECK-NEXT: br label [[TMP5]] +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @omp_get_max_threads() +; CHECK-NEXT: call void @use(i32 [[TMP6]]) +; CHECK-NEXT: ret void +; + call void @omp_set_num_threads(i32 2) + %2 = icmp eq i1 %0, 0 + br i1 %2, label %5, label %3 + +3: ; preds = %1 + %4 = call i32 @bad_use(i32 10) + br label %5 + +5: ; preds = %3, %1 + %6 = call i32 @omp_get_max_threads() + call void @use(i32 %6) + ret void +} + +define weak void @weak_known_unique_icv(i1 %0) { +; CHECK-LABEL: define {{[^@]+}}@weak_known_unique_icv +; CHECK-SAME: (i1 [[TMP0:%.*]]) +; CHECK-NEXT: call void @omp_set_num_threads(i32 2) +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i1 [[TMP0]], false +; CHECK-NEXT: br i1 [[TMP2]], label [[TMP5:%.*]], label [[TMP3:%.*]] +; CHECK: 3: +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @icv_free_use(i32 10) +; CHECK-NEXT: br label [[TMP5]] +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @omp_get_max_threads() +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @icv_free_use(i32 [[TMP6]]) +; CHECK-NEXT: ret void +; + call void @omp_set_num_threads(i32 2) + %2 = icmp eq i1 %0, 0 + br i1 %2, label %5, label %3 + +3: ; preds = %1 + %4 = call i32 @icv_free_use(i32 10) + br label %5 + +5: ; preds = %3, %1 + %6 = call i32 @omp_get_max_threads() + %7 = call i32 @icv_free_use(i32 %6) + ret void +} + +define void @known_unique_icv(i1 %0) { +; CHECK-LABEL: define {{[^@]+}}@known_unique_icv +; CHECK-SAME: (i1 [[TMP0:%.*]]) +; CHECK-NEXT: call void @omp_set_num_threads(i32 2) +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i1 [[TMP0]], false +; CHECK-NEXT: br i1 [[TMP2]], label [[TMP5:%.*]], label [[TMP3:%.*]] +; CHECK: 3: +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @icv_free_use(i32 10) +; CHECK-NEXT: br label [[TMP5]] +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @icv_free_use(i32 2) +; CHECK-NEXT: ret void +; + call void @omp_set_num_threads(i32 2) + %2 = icmp eq i1 %0, 0 + br i1 %2, label %5, label %3 + +3: ; preds = %1 + %4 = call i32 @icv_free_use(i32 10) + br label %5 + +5: ; preds = %3, %1 + %6 = call i32 @omp_get_max_threads() + %7 = call i32 @icv_free_use(i32 %6) + ret void +} + +define i32 @no_unique_icv(i1 %0) { +; CHECK-LABEL: define {{[^@]+}}@no_unique_icv +; CHECK-SAME: (i1 [[TMP0:%.*]]) +; CHECK-NEXT: call void @omp_set_num_threads(i32 4) +; CHECK-NEXT: br i1 [[TMP0]], label [[TMP3:%.*]], label [[TMP2:%.*]] +; CHECK: 2: +; CHECK-NEXT: call void @omp_set_num_threads(i32 2) +; CHECK-NEXT: br label [[TMP3]] +; CHECK: 3: +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @omp_get_max_threads() +; CHECK-NEXT: ret i32 [[TMP4]] +; + call void @omp_set_num_threads(i32 4) + br i1 %0, label %3, label %2 + +2: ; preds = %1 + call void @omp_set_num_threads(i32 2) + br label %3 + +3: ; preds = %1, %2 + %4 = call i32 @omp_get_max_threads() + ret i32 %4 +} + +define void @test2(i1 %0) { +; CHECK-LABEL: define {{[^@]+}}@test2 +; CHECK-SAME: (i1 [[TMP0:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i1 [[TMP0]], false +; CHECK-NEXT: br i1 [[TMP2]], label [[TMP4:%.*]], label [[TMP3:%.*]] +; CHECK: 3: +; CHECK-NEXT: call void @omp_set_num_threads(i32 4) +; CHECK-NEXT: br label [[TMP4]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @omp_get_max_threads() +; CHECK-NEXT: call void @use(i32 [[TMP5]]) +; CHECK-NEXT: ret void +; + %2 = icmp eq i1 %0, 0 + br i1 %2, label %4, label %3 + +3: ; preds = %1 + call void @omp_set_num_threads(i32 4) + br label %4 + +4: ; preds = %3, %1 + %5 = call i32 @omp_get_max_threads() + call void @use(i32 %5) + ret void +} + +define void @test3(i1 %0) { +; CHECK-LABEL: define {{[^@]+}}@test3 +; CHECK-SAME: (i1 [[TMP0:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i1 [[TMP0]], false +; CHECK-NEXT: br i1 [[TMP2]], label [[TMP4:%.*]], label [[TMP3:%.*]] +; CHECK: 3: +; CHECK-NEXT: call void @omp_set_num_threads(i32 4) +; CHECK-NEXT: br label [[TMP4]] +; CHECK: 4: +; CHECK-NEXT: call void @weak_known_unique_icv(i1 [[TMP0]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @omp_get_max_threads() +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @icv_free_use(i32 [[TMP5]]) +; CHECK-NEXT: ret void +; + %2 = icmp eq i1 %0, 0 + br i1 %2, label %4, label %3 + +3: ; preds = %1 + call void @omp_set_num_threads(i32 4) + br label %4 + +4: ; preds = %3, %1 + call void @weak_known_unique_icv(i1 %0) + %5 = call i32 @omp_get_max_threads() + %6 = call i32 @icv_free_use(i32 %5) + ret void +} + +declare void @__cxa_rethrow() + +define i32 @maybe_throw(i1 zeroext %0) { +; CHECK-LABEL: define {{[^@]+}}@maybe_throw +; CHECK-SAME: (i1 zeroext [[TMP0:%.*]]) +; CHECK-NEXT: call void @omp_set_num_threads(i32 4) +; CHECK-NEXT: br i1 [[TMP0]], label [[TMP2:%.*]], label [[TMP3:%.*]] +; CHECK: 2: +; CHECK-NEXT: tail call void @__cxa_rethrow() +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: ret i32 -1 +; + call void @omp_set_num_threads(i32 4) + br i1 %0, label %2, label %3 + +2: ; preds = %1 + tail call void @__cxa_rethrow() #1 + unreachable + +3: ; preds = %1 + ret i32 -1 +} + +define void @test4(i1 %0) { +; CHECK-LABEL: define {{[^@]+}}@test4 +; CHECK-SAME: (i1 [[TMP0:%.*]]) +; CHECK-NEXT: call void @known_unique_icv(i1 [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i1 [[TMP0]], false +; CHECK-NEXT: br i1 [[TMP2]], label [[TMP4:%.*]], label [[TMP3:%.*]] +; CHECK: 3: +; CHECK-NEXT: [[VAL:%.*]] = call i32 @icv_free_use(i32 10) +; CHECK-NEXT: br label [[TMP4]] +; CHECK: 4: +; CHECK-NEXT: call void @use(i32 2) +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @omp_get_max_threads() +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @no_unique_icv(i1 [[TMP0]]) +; CHECK-NEXT: call void @use(i32 [[TMP5]]) +; CHECK-NEXT: ret void +; + call void @known_unique_icv(i1 %0) + %2 = icmp eq i1 %0, 0 + br i1 %2, label %4, label %3 + +3: ; preds = %1 + %val = call i32 @icv_free_use(i32 10) + br label %4 + +4: ; preds = %3, %1 + %5 = call i32 @omp_get_max_threads() + call void @use(i32 %5) + %6 = call i32 @omp_get_max_threads() + call i32 @no_unique_icv(i1 %0) + call void @use(i32 %6) + ret void +} + +define void @test4_invoke(i1 %0) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: define {{[^@]+}}@test4_invoke +; CHECK-SAME: (i1 [[TMP0:%.*]]) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) +; CHECK-NEXT: call void @known_unique_icv(i1 [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = invoke i32 @maybe_throw(i1 [[TMP0]]) +; CHECK-NEXT: to label [[CONT:%.*]] unwind label [[EXC:%.*]] +; CHECK: cont: +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i1 [[TMP0]], false +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP5:%.*]], label [[TMP4:%.*]] +; CHECK: exc: +; CHECK-NEXT: [[LP:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: filter [0 x i8*] zeroinitializer +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[VAL:%.*]] = call i32 @icv_free_use(i32 10) +; CHECK-NEXT: br label [[TMP5]] +; CHECK: 5: +; CHECK-NEXT: call void @use(i32 2) +; CHECK-NEXT: ret void +; + call void @known_unique_icv(i1 %0) + invoke i32 @maybe_throw(i1 %0) + to label %cont unwind label %exc + +cont: + %3 = icmp eq i1 %0, 0 + br i1 %3, label %5, label %4 + +exc: + %lp = landingpad { i8*, i32 } + filter [0 x i8*] zeroinitializer + unreachable + +4: ; preds = %1 + %val = call i32 @icv_free_use(i32 10) + br label %5 + +5: ; preds = %3, %1 + %6 = call i32 @omp_get_max_threads() + call void @use(i32 %6) + ret void +} + +define i32 @test5(i32 %0) #0 { +; CHECK-LABEL: define {{[^@]+}}@test5 +; CHECK-SAME: (i32 [[TMP0:%.*]]) +; CHECK-NEXT: call void @omp_set_num_threads(i32 4) +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP0]], 3 +; CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; CHECK: 3: +; CHECK-NEXT: call void @use(i32 4) +; CHECK-NEXT: br label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP8:%.*]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @icv_free_use(i32 [[TMP0]]) +; CHECK-NEXT: br label [[TMP15:%.*]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12]] +; CHECK: 10: +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @icv_free_use(i32 10) +; CHECK-NEXT: br label [[TMP15]] +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = add nsw i32 [[TMP0]], 1 +; CHECK-NEXT: [[TMP14:%.*]] = call i32 @icv_free_use(i32 [[TMP13]]) +; CHECK-NEXT: br label [[TMP15]] +; CHECK: 15: +; CHECK-NEXT: [[TMP16:%.*]] = call i32 @omp_get_max_threads() +; CHECK-NEXT: [[TMP17:%.*]] = call i32 @icv_free_use(i32 [[TMP16]]) +; CHECK-NEXT: ret i32 [[TMP17]] +; + call void @omp_set_num_threads(i32 4) + %2 = icmp sgt i32 %0, 3 + br i1 %2, label %3, label %5 + +3: + %4 = call i32 @omp_get_max_threads() + call void @use(i32 %4) + br label %13 + +5: + %6 = icmp sgt i32 %0, 0 + br i1 %6, label %7, label %9 + +7: + %8 = call i32 @icv_free_use(i32 %0) + br label %16 + +9: + %10 = icmp eq i32 %0, 0 + br i1 %10, label %11, label %13 + +11: + %12 = call i32 @icv_free_use(i32 10) + br label %16 + +13: + %14 = add nsw i32 %0, 1 + %15 = call i32 @icv_free_use(i32 %14) + br label %16 + +16: + %17 = call i32 @omp_get_max_threads() + %18 = call i32 @icv_free_use(i32 %17) + ret i32 %18 +} + +define i32 @test6(i32 %0) { +; CHECK-LABEL: define {{[^@]+}}@test6 +; CHECK-SAME: (i32 [[TMP0:%.*]]) +; CHECK-NEXT: call void @omp_set_num_threads(i32 4) +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP0]], 3 +; CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP5:%.*]] +; CHECK: 3: +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @icv_free_use(i32 10) +; CHECK-NEXT: br label [[TMP16:%.*]] +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TMP6]], label [[TMP7:%.*]], label [[TMP9:%.*]] +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @icv_free_use(i32 [[TMP0]]) +; CHECK-NEXT: br label [[TMP16]] +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP13:%.*]] +; CHECK: 11: +; CHECK-NEXT: [[TMP12:%.*]] = call i32 @icv_free_use(i32 5) +; CHECK-NEXT: br label [[TMP16]] +; CHECK: 13: +; CHECK-NEXT: [[TMP14:%.*]] = add nsw i32 [[TMP0]], 1 +; CHECK-NEXT: [[TMP15:%.*]] = call i32 @icv_free_use(i32 [[TMP14]]) +; CHECK-NEXT: br label [[TMP16]] +; CHECK: 16: +; CHECK-NEXT: [[TMP17:%.*]] = call i32 @icv_free_use(i32 4) +; CHECK-NEXT: ret i32 [[TMP17]] +; + call void @omp_set_num_threads(i32 4) + %2 = icmp sgt i32 %0, 3 + br i1 %2, label %3, label %5 + +3: ; preds = %1 + %4 = call i32 @icv_free_use(i32 10) + br label %16 + +5: ; preds = %1 + %6 = icmp sgt i32 %0, 0 + br i1 %6, label %7, label %9 + +7: ; preds = %5 + %8 = call i32 @icv_free_use(i32 %0) + br label %16 + +9: ; preds = %5 + %10 = icmp eq i32 %0, 0 + br i1 %10, label %11, label %13 + +11: ; preds = %9 + %12 = call i32 @icv_free_use(i32 5) + br label %16 + +13: ; preds = %9 + %14 = add nsw i32 %0, 1 + %15 = call i32 @icv_free_use(i32 %14) + br label %16 + +16: ; preds = %7, %13, %11, %3 + %17 = call i32 @omp_get_max_threads() + %18 = call i32 @icv_free_use(i32 %17) + ret i32 %18 +} + +declare i32 @__gxx_personality_v0(...) + !0 = !{!1} !1 = !{i64 2, i64 -1, i64 -1, i1 true}