[Attributor] Introduce AAInstanceInfo

The Attributor, as many other parts in LLVM, uses pointer equivalence
for `llvm::Value`s. This only works as long as `llvm::Value`s are
dynamically unique, or, to be exact, we will never end up with the same
`llvm::Value` representing two dynamic instances. We already provided a
helper to check the former, namely `AA::isDynamicallyUnique`, however we
could not check the latter. In this patch we move the logic into a
separate AA which helps with the growing complexity and use cases. We
also extend the interface to answer the second question rather than the
first. So we do not determine dynamically uniqueness but if we might end
up with the `llvm::Value` describing a different dynamic instance. Note
that the latter is very much tied to the Attributor capabilities to look
through memory, recursion, etc. so we need to update the logic as we go.
This commit is contained in:
Johannes Doerfert 2022-03-16 15:53:32 -05:00
parent c42aa1be74
commit af30de7788
11 changed files with 415 additions and 172 deletions

View File

@ -154,8 +154,11 @@ bool isNoSyncInst(Attributor &A, const Instruction &I,
/// Return true if \p V is dynamically unique, that is, there are no two
/// "instances" of \p V at runtime with different values.
/// Note: If \p ForAnalysisOnly is set we only check that the Attributor will
/// never use \p V to represent two "instances" not that \p V could not
/// technically represent them.
bool isDynamicallyUnique(Attributor &A, const AbstractAttribute &QueryingAA,
const Value &V);
const Value &V, bool ForAnalysisOnly = true);
/// Return true if \p V is a valid value in \p Scope, that is a constant or an
/// instruction/argument of \p Scope.
@ -3705,6 +3708,46 @@ struct AAAlign : public IRAttribute<
static const char ID;
};
/// An abstract interface to track if a value leaves it's defining function
/// instance.
/// TODO: We should make it a ternary AA tracking uniqueness, and uniqueness
/// wrt. the Attributor analysis separately.
struct AAInstanceInfo : public StateWrapper<BooleanState, AbstractAttribute> {
AAInstanceInfo(const IRPosition &IRP, Attributor &A)
: StateWrapper<BooleanState, AbstractAttribute>(IRP) {}
/// Return true if we know that the underlying value is unique in its scope
/// wrt. the Attributor analysis. That means it might not be unique but we can
/// still use pointer equality without risking to represent two instances with
/// one `llvm::Value`.
bool isKnownUniqueForAnalysis() const { return isKnown(); }
/// Return true if we assume that the underlying value is unique in its scope
/// wrt. the Attributor analysis. That means it might not be unique but we can
/// still use pointer equality without risking to represent two instances with
/// one `llvm::Value`.
bool isAssumedUniqueForAnalysis() const { return isAssumed(); }
/// Create an abstract attribute view for the position \p IRP.
static AAInstanceInfo &createForPosition(const IRPosition &IRP,
Attributor &A);
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAInstanceInfo"; }
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is
/// AAInstanceInfo
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
/// Unique ID (due to the unique address)
static const char ID;
};
/// An abstract interface for all nocapture attributes.
struct AANoCapture
: public IRAttribute<

View File

@ -209,23 +209,13 @@ bool AA::isNoSyncInst(Attributor &A, const Instruction &I,
}
bool AA::isDynamicallyUnique(Attributor &A, const AbstractAttribute &QueryingAA,
const Value &V) {
if (auto *C = dyn_cast<Constant>(&V))
return !C->isThreadDependent();
// TODO: Inspect and cache more complex instructions.
if (auto *CB = dyn_cast<CallBase>(&V))
return CB->arg_size() == 0 && !CB->mayHaveSideEffects() &&
!CB->mayReadFromMemory();
const Function *Scope = nullptr;
if (auto *I = dyn_cast<Instruction>(&V))
Scope = I->getFunction();
if (auto *A = dyn_cast<Argument>(&V))
Scope = A->getParent();
if (!Scope)
const Value &V, bool ForAnalysisOnly) {
// TODO: See the AAInstanceInfo class comment.
if (!ForAnalysisOnly)
return false;
auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
QueryingAA, IRPosition::function(*Scope), DepClassTy::OPTIONAL);
return NoRecurseAA.isAssumedNoRecurse();
auto &InstanceInfoAA = A.getAAFor<AAInstanceInfo>(
QueryingAA, IRPosition::value(V), DepClassTy::OPTIONAL);
return InstanceInfoAA.isAssumedUniqueForAnalysis();
}
Constant *AA::getInitialValueForObj(Value &Obj, Type &Ty,

View File

@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SetVector.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/Transforms/IPO/Attributor.h"
#include "llvm/ADT/APInt.h"
@ -147,6 +148,7 @@ PIPE_OPERATOR(AANonNull)
PIPE_OPERATOR(AANoAlias)
PIPE_OPERATOR(AADereferenceable)
PIPE_OPERATOR(AAAlign)
PIPE_OPERATOR(AAInstanceInfo)
PIPE_OPERATOR(AANoCapture)
PIPE_OPERATOR(AAValueSimplify)
PIPE_OPERATOR(AANoFree)
@ -4812,6 +4814,169 @@ struct AANoReturnCallSite final : AANoReturnImpl {
};
} // namespace
/// ----------------------- Instance Info ---------------------------------
namespace {
/// A class to hold the state of for no-capture attributes.
struct AAInstanceInfoImpl : public AAInstanceInfo {
AAInstanceInfoImpl(const IRPosition &IRP, Attributor &A)
: AAInstanceInfo(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
Value &V = getAssociatedValue();
if (auto *C = dyn_cast<Constant>(&V)) {
if (C->isThreadDependent())
indicatePessimisticFixpoint();
else
indicateOptimisticFixpoint();
return;
}
if (auto *CB = dyn_cast<CallBase>(&V))
if (CB->arg_size() == 0 && !CB->mayHaveSideEffects() &&
!CB->mayReadFromMemory()) {
indicateOptimisticFixpoint();
return;
}
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
Value &V = getAssociatedValue();
const Function *Scope = nullptr;
if (auto *I = dyn_cast<Instruction>(&V))
Scope = I->getFunction();
if (auto *A = dyn_cast<Argument>(&V)) {
Scope = A->getParent();
if (!Scope->hasLocalLinkage())
return Changed;
}
if (!Scope)
return indicateOptimisticFixpoint();
auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
*this, IRPosition::function(*Scope), DepClassTy::OPTIONAL);
if (NoRecurseAA.isAssumedNoRecurse())
return Changed;
auto UsePred = [&](const Use &U, bool &Follow) {
const Instruction *UserI = dyn_cast<Instruction>(U.getUser());
if (!UserI || isa<GetElementPtrInst>(UserI) || isa<CastInst>(UserI) ||
isa<PHINode>(UserI) || isa<SelectInst>(UserI)) {
Follow = true;
return true;
}
if (isa<LoadInst>(UserI) || isa<CmpInst>(UserI) ||
(isa<StoreInst>(UserI) &&
cast<StoreInst>(UserI)->getValueOperand() != U.get()))
return true;
if (auto *CB = dyn_cast<CallBase>(UserI)) {
// This check is not guaranteeing uniqueness but for now that we cannot
// end up with two versions of \p U thinking it was one.
if (!CB->getCalledFunction() ||
!CB->getCalledFunction()->hasLocalLinkage())
return true;
if (!CB->isArgOperand(&U))
return false;
const auto &ArgInstanceInfoAA = A.getAAFor<AAInstanceInfo>(
*this, IRPosition::callsite_argument(*CB, CB->getArgOperandNo(&U)),
DepClassTy::OPTIONAL);
if (ArgInstanceInfoAA.isAssumedUniqueForAnalysis())
return true;
}
return false;
};
auto EquivalentUseCB = [&](const Use &OldU, const Use &NewU) {
if (auto *SI = dyn_cast<StoreInst>(OldU.getUser())) {
auto *Ptr = SI->getPointerOperand()->stripPointerCasts();
if (isa<AllocaInst>(Ptr) && AA::isDynamicallyUnique(A, *this, *Ptr))
return true;
auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(
*SI->getFunction());
if (isAllocationFn(Ptr, TLI) && AA::isDynamicallyUnique(A, *this, *Ptr))
return true;
}
return false;
};
if (!A.checkForAllUses(UsePred, *this, V, /* CheckBBLivenessOnly */ true,
DepClassTy::OPTIONAL,
/* IgnoreDroppableUses */ true, EquivalentUseCB))
return indicatePessimisticFixpoint();
return Changed;
}
/// See AbstractState::getAsStr().
const std::string getAsStr() const override {
return isAssumedUniqueForAnalysis() ? "<unique [fAa]>" : "<unknown>";
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {}
};
/// InstanceInfo attribute for floating values.
struct AAInstanceInfoFloating : AAInstanceInfoImpl {
AAInstanceInfoFloating(const IRPosition &IRP, Attributor &A)
: AAInstanceInfoImpl(IRP, A) {}
};
/// NoCapture attribute for function arguments.
struct AAInstanceInfoArgument final : AAInstanceInfoFloating {
AAInstanceInfoArgument(const IRPosition &IRP, Attributor &A)
: AAInstanceInfoFloating(IRP, A) {}
};
/// InstanceInfo attribute for call site arguments.
struct AAInstanceInfoCallSiteArgument final : AAInstanceInfoImpl {
AAInstanceInfoCallSiteArgument(const IRPosition &IRP, Attributor &A)
: AAInstanceInfoImpl(IRP, A) {}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
// TODO: Once we have call site specific value information we can provide
// call site specific liveness information and then it makes
// sense to specialize attributes for call sites arguments instead of
// redirecting requests to the callee argument.
Argument *Arg = getAssociatedArgument();
if (!Arg)
return indicatePessimisticFixpoint();
const IRPosition &ArgPos = IRPosition::argument(*Arg);
auto &ArgAA =
A.getAAFor<AAInstanceInfo>(*this, ArgPos, DepClassTy::REQUIRED);
return clampStateAndIndicateChange(getState(), ArgAA.getState());
}
};
/// InstanceInfo attribute for function return value.
struct AAInstanceInfoReturned final : AAInstanceInfoImpl {
AAInstanceInfoReturned(const IRPosition &IRP, Attributor &A)
: AAInstanceInfoImpl(IRP, A) {
llvm_unreachable("InstanceInfo is not applicable to function returns!");
}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
llvm_unreachable("InstanceInfo is not applicable to function returns!");
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
llvm_unreachable("InstanceInfo is not applicable to function returns!");
}
};
/// InstanceInfo attribute deduction for a call site return value.
struct AAInstanceInfoCallSiteReturned final : AAInstanceInfoFloating {
AAInstanceInfoCallSiteReturned(const IRPosition &IRP, Attributor &A)
: AAInstanceInfoFloating(IRP, A) {}
};
} // namespace
/// ----------------------- Variable Capturing ---------------------------------
namespace {
@ -10047,6 +10212,7 @@ const char AANoReturn::ID = 0;
const char AAIsDead::ID = 0;
const char AADereferenceable::ID = 0;
const char AAAlign::ID = 0;
const char AAInstanceInfo::ID = 0;
const char AANoCapture::ID = 0;
const char AAValueSimplify::ID = 0;
const char AAHeapToStack::ID = 0;
@ -10169,6 +10335,7 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoAlias)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPrivatizablePtr)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AADereferenceable)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAlign)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAInstanceInfo)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueConstantRange)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialValues)

View File

@ -17,14 +17,22 @@ define i32 @bar(i32 %arg) {
; IS________OPM-NEXT: [[CALL:%.*]] = call i32 @foo(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[STACK]])
; IS________OPM-NEXT: ret i32 [[CALL]]
;
; IS________NPM-LABEL: define {{[^@]+}}@bar
; IS________NPM-SAME: (i32 [[ARG:%.*]]) {
; IS________NPM-NEXT: entry:
; IS________NPM-NEXT: [[STACK:%.*]] = alloca i32, align 4
; IS________NPM-NEXT: store i32 [[ARG]], i32* [[STACK]], align 4
; IS________NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[STACK]], align 4
; IS________NPM-NEXT: [[CALL:%.*]] = call i32 @foo(i32 [[TMP0]])
; IS________NPM-NEXT: ret i32 [[CALL]]
; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@bar
; IS__TUNIT_NPM-SAME: (i32 [[ARG:%.*]]) {
; IS__TUNIT_NPM-NEXT: entry:
; IS__TUNIT_NPM-NEXT: [[STACK:%.*]] = alloca i32, align 4
; IS__TUNIT_NPM-NEXT: store i32 [[ARG]], i32* [[STACK]], align 4
; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[STACK]], align 4
; IS__TUNIT_NPM-NEXT: [[CALL:%.*]] = call i32 @foo(i32 [[TMP0]])
; IS__TUNIT_NPM-NEXT: ret i32 [[CALL]]
;
; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@bar
; IS__CGSCC_NPM-SAME: (i32 [[ARG:%.*]]) {
; IS__CGSCC_NPM-NEXT: entry:
; IS__CGSCC_NPM-NEXT: [[STACK:%.*]] = alloca i32, align 4
; IS__CGSCC_NPM-NEXT: store i32 [[ARG]], i32* [[STACK]], align 4
; IS__CGSCC_NPM-NEXT: [[CALL:%.*]] = call i32 @foo(i32 [[ARG]])
; IS__CGSCC_NPM-NEXT: ret i32 [[CALL]]
;
entry:
%stack = alloca i32

View File

@ -85,7 +85,7 @@ define internal void @t0_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a,
; IS________OPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
; IS________OPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
; IS________OPM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
; IS________OPM-NEXT: tail call void @t0_check(i32* align 256 [[A]], i64 noundef 99, i32* [[TMP0]])
; IS________OPM-NEXT: tail call void @t0_check(i32* align 256 [[A]], i64 noundef 99, i32* noundef nonnull align 32 dereferenceable(4) [[TMP0]])
; IS________OPM-NEXT: ret void
;
; IS________NPM-LABEL: define {{[^@]+}}@t0_callback_callee
@ -94,7 +94,7 @@ define internal void @t0_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a,
; IS________NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
; IS________NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
; IS________NPM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
; IS________NPM-NEXT: tail call void @t0_check(i32* align 256 [[A]], i64 noundef 99, i32* [[TMP0]])
; IS________NPM-NEXT: tail call void @t0_check(i32* align 256 [[A]], i64 noundef 99, i32* noundef nonnull align 32 dereferenceable(4) [[TMP0]])
; IS________NPM-NEXT: ret void
;
entry:
@ -186,7 +186,7 @@ define internal void @t1_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a,
; IS________OPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
; IS________OPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
; IS________OPM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
; IS________OPM-NEXT: tail call void @t1_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture [[TMP0]])
; IS________OPM-NEXT: tail call void @t1_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture noundef nonnull align 32 dereferenceable(4) [[TMP0]])
; IS________OPM-NEXT: ret void
;
; IS________NPM: Function Attrs: nosync
@ -196,7 +196,7 @@ define internal void @t1_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a,
; IS________NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
; IS________NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
; IS________NPM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
; IS________NPM-NEXT: tail call void @t1_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture [[TMP0]])
; IS________NPM-NEXT: tail call void @t1_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture noundef nonnull align 32 dereferenceable(4) [[TMP0]])
; IS________NPM-NEXT: ret void
;
entry:
@ -287,7 +287,7 @@ define internal void @t2_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a,
; IS________OPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
; IS________OPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
; IS________OPM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
; IS________OPM-NEXT: tail call void @t2_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture [[TMP0]])
; IS________OPM-NEXT: tail call void @t2_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture noundef nonnull align 32 dereferenceable(4) [[TMP0]])
; IS________OPM-NEXT: ret void
;
; IS________NPM-LABEL: define {{[^@]+}}@t2_callback_callee
@ -296,7 +296,7 @@ define internal void @t2_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a,
; IS________NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
; IS________NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
; IS________NPM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
; IS________NPM-NEXT: tail call void @t2_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture [[TMP0]])
; IS________NPM-NEXT: tail call void @t2_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture noundef nonnull align 32 dereferenceable(4) [[TMP0]])
; IS________NPM-NEXT: ret void
;
entry:
@ -392,7 +392,7 @@ define internal void @t3_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a,
; IS________OPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
; IS________OPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
; IS________OPM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
; IS________OPM-NEXT: tail call void @t3_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture [[TMP0]])
; IS________OPM-NEXT: tail call void @t3_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture noundef nonnull align 32 dereferenceable(4) [[TMP0]])
; IS________OPM-NEXT: ret void
;
; IS________NPM-LABEL: define {{[^@]+}}@t3_callback_callee
@ -401,7 +401,7 @@ define internal void @t3_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a,
; IS________NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
; IS________NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
; IS________NPM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
; IS________NPM-NEXT: tail call void @t3_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture [[TMP0]])
; IS________NPM-NEXT: tail call void @t3_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture noundef nonnull align 32 dereferenceable(4) [[TMP0]])
; IS________NPM-NEXT: ret void
;
entry:

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=8 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM

View File

@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM

View File

@ -159,8 +159,7 @@ define void @f(i32 %x) {
; NOT_CGSCC_NPM-NEXT: entry:
; NOT_CGSCC_NPM-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4
; NOT_CGSCC_NPM-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4
; NOT_CGSCC_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[X_ADDR]], align 4
; NOT_CGSCC_NPM-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
; NOT_CGSCC_NPM-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0
; NOT_CGSCC_NPM-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; NOT_CGSCC_NPM: if.then:
; NOT_CGSCC_NPM-NEXT: br label [[IF_END]]
@ -172,7 +171,6 @@ define void @f(i32 %x) {
; IS__CGSCC_NPM-SAME: (i32 [[X:%.*]]) #[[ATTR0]] {
; IS__CGSCC_NPM-NEXT: entry:
; IS__CGSCC_NPM-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4
; IS__CGSCC_NPM-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4
; IS__CGSCC_NPM-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0
; IS__CGSCC_NPM-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; IS__CGSCC_NPM: if.then:

View File

@ -606,11 +606,6 @@ define i1 @keep_assume_1(i1 %arg) {
define i1 @drop_assume_1(i1 %arg) {
; CHECK: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn
; CHECK-LABEL: define {{[^@]+}}@drop_assume_1
; CHECK-SAME: (i1 returned [[ARG:%.*]]) #[[ATTR3]] {
; CHECK-NEXT: [[STACK:%.*]] = alloca i1, align 1
; CHECK-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1
; CHECK-NEXT: call void @llvm.assume(i1 noundef [[ARG]]) #[[ATTR4]]
; CHECK-NEXT: ret i1 [[ARG]]
;
%stack = alloca i1
store i1 %arg, i1* %stack

View File

@ -3265,13 +3265,12 @@ define dso_local i32 @round_trip_malloc(i32 %x) {
; IS________OPM-NEXT: ret i32 [[TMP1]]
;
; IS________NPM-LABEL: define {{[^@]+}}@round_trip_malloc
; IS________NPM-SAME: (i32 [[X:%.*]]) {
; IS________NPM-SAME: (i32 returned [[X:%.*]]) {
; IS________NPM-NEXT: entry:
; IS________NPM-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1
; IS________NPM-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32*
; IS________NPM-NEXT: store i32 [[X]], i32* [[TMP1]], align 4
; IS________NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
; IS________NPM-NEXT: ret i32 [[TMP2]]
; IS________NPM-NEXT: ret i32 [[X]]
;
entry:
%call = call noalias i8* @malloc(i64 4) norecurse
@ -3327,7 +3326,7 @@ define dso_local i32 @conditional_malloc(i32 %x) {
; IS________OPM-NEXT: ret i32 [[TMP1]]
;
; IS________NPM-LABEL: define {{[^@]+}}@conditional_malloc
; IS________NPM-SAME: (i32 [[X:%.*]]) {
; IS________NPM-SAME: (i32 returned [[X:%.*]]) {
; IS________NPM-NEXT: entry:
; IS________NPM-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1
; IS________NPM-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32*
@ -3337,8 +3336,7 @@ define dso_local i32 @conditional_malloc(i32 %x) {
; IS________NPM-NEXT: store i32 [[X]], i32* [[TMP1]], align 4
; IS________NPM-NEXT: br label [[IF_END]]
; IS________NPM: if.end:
; IS________NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
; IS________NPM-NEXT: ret i32 [[TMP2]]
; IS________NPM-NEXT: ret i32 [[X]]
;
entry:
%call = call noalias i8* @malloc(i64 4) norecurse

View File

@ -4928,17 +4928,21 @@ entry:
; CHECK1-LABEL: define {{[^@]+}}@merge_seq_float
; CHECK1-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
; CHECK1-NEXT: entry:
; CHECK1-NEXT: [[STRUCTARG:%.*]] = alloca { float*, float* }, align 8
; CHECK1-NEXT: [[STRUCTARG:%.*]] = alloca { float*, float*, float* }, align 8
; CHECK1-NEXT: [[F_RELOADED:%.*]] = alloca float, align 4
; CHECK1-NEXT: [[F_ADDR:%.*]] = alloca float, align 4
; CHECK1-NEXT: store float [[F]], float* [[F_ADDR]], align 4
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: store float [[F]], float* [[F_RELOADED]], align 4
; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK1: omp_parallel:
; CHECK1-NEXT: [[GEP_F_ADDR:%.*]] = getelementptr { float*, float* }, { float*, float* }* [[STRUCTARG]], i32 0, i32 0
; CHECK1-NEXT: [[GEP_F_RELOADED:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 0
; CHECK1-NEXT: store float* [[F_RELOADED]], float** [[GEP_F_RELOADED]], align 8
; CHECK1-NEXT: [[GEP_F_ADDR:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 1
; CHECK1-NEXT: store float* [[F_ADDR]], float** [[GEP_F_ADDR]], align 8
; CHECK1-NEXT: [[GEP_P:%.*]] = getelementptr { float*, float* }, { float*, float* }* [[STRUCTARG]], i32 0, i32 1
; CHECK1-NEXT: [[GEP_P:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 2
; CHECK1-NEXT: store float* [[P]], float** [[GEP_P]], align 8
; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { float*, float* }*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), { float*, float* }* [[STRUCTARG]])
; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { float*, float*, float* }*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), { float*, float*, float* }* [[STRUCTARG]])
; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK1: omp.par.outlined.exit:
; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@ -4949,16 +4953,19 @@ entry:
;
;
; CHECK1-LABEL: define {{[^@]+}}@merge_seq_float..omp_par
; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { float*, float* }* [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { float*, float*, float* }* [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK1-NEXT: omp.par.entry:
; CHECK1-NEXT: [[GEP_F_ADDR:%.*]] = getelementptr { float*, float* }, { float*, float* }* [[TMP0]], i32 0, i32 0
; CHECK1-NEXT: [[GEP_F_RELOADED:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 0
; CHECK1-NEXT: [[LOADGEP_F_RELOADED:%.*]] = load float*, float** [[GEP_F_RELOADED]], align 8
; CHECK1-NEXT: [[GEP_F_ADDR:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 1
; CHECK1-NEXT: [[LOADGEP_F_ADDR:%.*]] = load float*, float** [[GEP_F_ADDR]], align 8
; CHECK1-NEXT: [[GEP_P:%.*]] = getelementptr { float*, float* }, { float*, float* }* [[TMP0]], i32 0, i32 1
; CHECK1-NEXT: [[GEP_P:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 2
; CHECK1-NEXT: [[LOADGEP_P:%.*]] = load float*, float** [[GEP_P]], align 8
; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
; CHECK1-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
; CHECK1-NEXT: [[TMP2:%.*]] = load float, float* [[LOADGEP_F_RELOADED]], align 4
; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]]
; CHECK1: omp.par.region:
; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]]
@ -4967,9 +4974,9 @@ entry:
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
; CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK1: omp_region.end:
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
@ -4986,8 +4993,7 @@ entry:
; CHECK1: omp_region.body:
; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
; CHECK1: seq.par.merged:
; CHECK1-NEXT: [[TMP4:%.*]] = load float, float* [[LOADGEP_F_ADDR]], align 4
; CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP4]], 0x40091EB860000000
; CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP2]], 0x40091EB860000000
; CHECK1-NEXT: store float [[ADD]], float* [[LOADGEP_P]], align 4
; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK1: omp.par.merged.split:
@ -5206,20 +5212,24 @@ entry:
; CHECK1-LABEL: define {{[^@]+}}@merge_seq_par_use
; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK1-NEXT: entry:
; CHECK1-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32* }, align 8
; CHECK1-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32* }, align 8
; CHECK1-NEXT: [[A_RELOADED:%.*]] = alloca i32, align 4
; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4
; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4
; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK1: omp_parallel:
; CHECK1-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
; CHECK1-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
; CHECK1-NEXT: store i32* [[A_RELOADED]], i32** [[GEP_A_RELOADED]], align 8
; CHECK1-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
; CHECK1-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
; CHECK1-NEXT: [[GEP_B:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
; CHECK1-NEXT: [[GEP_B:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
; CHECK1-NEXT: store i32* [[B]], i32** [[GEP_B]], align 8
; CHECK1-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8*
; CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32* }*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), { i32*, i32* }* [[STRUCTARG]])
; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32* }*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32* }* [[STRUCTARG]])
; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK1: omp.par.outlined.exit:
; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@ -5232,16 +5242,19 @@ entry:
;
;
; CHECK1-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par
; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK1-NEXT: omp.par.entry:
; CHECK1-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 0
; CHECK1-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
; CHECK1-NEXT: [[LOADGEP_A_RELOADED:%.*]] = load i32*, i32** [[GEP_A_RELOADED]], align 8
; CHECK1-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
; CHECK1-NEXT: [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
; CHECK1-NEXT: [[GEP_B:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 1
; CHECK1-NEXT: [[GEP_B:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
; CHECK1-NEXT: [[LOADGEP_B:%.*]] = load i32*, i32** [[GEP_B]], align 8
; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
; CHECK1-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
; CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[LOADGEP_A_RELOADED]], align 4
; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]]
; CHECK1: omp.par.region:
; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]]
@ -5250,9 +5263,9 @@ entry:
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
; CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK1: omp_region.end:
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
@ -5269,9 +5282,8 @@ entry:
; CHECK1: omp_region.body:
; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
; CHECK1: seq.par.merged:
; CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[LOADGEP_B]] to i8*
; CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[LOADGEP_A_ADDR]], align 4
; CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1
; CHECK1-NEXT: [[TMP5:%.*]] = bitcast i32* [[LOADGEP_B]] to i8*
; CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
; CHECK1-NEXT: store i32 [[ADD]], i32* [[LOADGEP_B]], align 4
; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK1: omp.par.merged.split:
@ -5387,19 +5399,23 @@ entry:
; CHECK1-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq
; CHECK1-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
; CHECK1-NEXT: entry:
; CHECK1-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32* }, align 8
; CHECK1-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32* }, align 8
; CHECK1-NEXT: [[CANCEL1_RELOADED:%.*]] = alloca i32, align 4
; CHECK1-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
; CHECK1-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
; CHECK1-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
; CHECK1-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_RELOADED]], align 4
; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK1: omp_parallel:
; CHECK1-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
; CHECK1-NEXT: [[GEP_CANCEL1_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
; CHECK1-NEXT: store i32* [[CANCEL1_RELOADED]], i32** [[GEP_CANCEL1_RELOADED]], align 8
; CHECK1-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
; CHECK1-NEXT: store i32* [[CANCEL1_ADDR]], i32** [[GEP_CANCEL1_ADDR]], align 8
; CHECK1-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
; CHECK1-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
; CHECK1-NEXT: store i32* [[CANCEL2_ADDR]], i32** [[GEP_CANCEL2_ADDR]], align 8
; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32* }*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32* }* [[STRUCTARG]])
; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32* }*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32* }* [[STRUCTARG]])
; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK1: omp.par.outlined.exit:
; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@ -5410,16 +5426,19 @@ entry:
;
;
; CHECK1-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par
; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK1-NEXT: omp.par.entry:
; CHECK1-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 0
; CHECK1-NEXT: [[GEP_CANCEL1_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
; CHECK1-NEXT: [[LOADGEP_CANCEL1_RELOADED:%.*]] = load i32*, i32** [[GEP_CANCEL1_RELOADED]], align 8
; CHECK1-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
; CHECK1-NEXT: [[LOADGEP_CANCEL1_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL1_ADDR]], align 8
; CHECK1-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 1
; CHECK1-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
; CHECK1-NEXT: [[LOADGEP_CANCEL2_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL2_ADDR]], align 8
; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
; CHECK1-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
; CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[LOADGEP_CANCEL1_RELOADED]], align 4
; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]]
; CHECK1: omp.par.region:
; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]]
@ -5428,9 +5447,9 @@ entry:
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
; CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK1: omp_region.end:
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
@ -5447,8 +5466,7 @@ entry:
; CHECK1: omp_region.body:
; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
; CHECK1: seq.par.merged:
; CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[LOADGEP_CANCEL1_ADDR]], align 4
; CHECK1-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP4]], 0
; CHECK1-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP2]], 0
; CHECK1-NEXT: [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32
; CHECK1-NEXT: store i32 [[LNOT_EXT]], i32* [[LOADGEP_CANCEL2_ADDR]], align 4
; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
@ -5568,21 +5586,25 @@ entry:
; CHECK1-LABEL: define {{[^@]+}}@merge_3_seq
; CHECK1-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr {
; CHECK1-NEXT: entry:
; CHECK1-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32* }, align 8
; CHECK1-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32*, i32* }, align 8
; CHECK1-NEXT: [[A_RELOADED:%.*]] = alloca i32, align 4
; CHECK1-NEXT: [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
; CHECK1-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4
; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK1: omp_parallel:
; CHECK1-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
; CHECK1-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
; CHECK1-NEXT: store i32* [[A_RELOADED]], i32** [[GEP_A_RELOADED]], align 8
; CHECK1-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
; CHECK1-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
; CHECK1-NEXT: [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
; CHECK1-NEXT: [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
; CHECK1-NEXT: store i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32** [[GEP_ADD_SEQ_OUTPUT_ALLOC]], align 8
; CHECK1-NEXT: [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
; CHECK1-NEXT: [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 3
; CHECK1-NEXT: store i32* [[ADD1_SEQ_OUTPUT_ALLOC]], i32** [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8
; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32* }*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32* }* [[STRUCTARG]])
; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32*, i32* }*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32*, i32* }* [[STRUCTARG]])
; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK1: omp.par.outlined.exit:
; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@ -5595,18 +5617,21 @@ entry:
;
;
; CHECK1-LABEL: define {{[^@]+}}@merge_3_seq..omp_par
; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK1-NEXT: omp.par.entry:
; CHECK1-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
; CHECK1-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
; CHECK1-NEXT: [[LOADGEP_A_RELOADED:%.*]] = load i32*, i32** [[GEP_A_RELOADED]], align 8
; CHECK1-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
; CHECK1-NEXT: [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
; CHECK1-NEXT: [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
; CHECK1-NEXT: [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
; CHECK1-NEXT: [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = load i32*, i32** [[GEP_ADD_SEQ_OUTPUT_ALLOC]], align 8
; CHECK1-NEXT: [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
; CHECK1-NEXT: [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 3
; CHECK1-NEXT: [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = load i32*, i32** [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8
; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
; CHECK1-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
; CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[LOADGEP_A_RELOADED]], align 4
; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]]
; CHECK1: omp.par.region:
; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]]
@ -5615,9 +5640,9 @@ entry:
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
; CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK1: omp_region.end:
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
@ -5627,9 +5652,9 @@ entry:
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
; CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
; CHECK1-NEXT: br i1 [[TMP5]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
; CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
; CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
; CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
; CHECK1: omp_region.end4:
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
@ -5646,9 +5671,8 @@ entry:
; CHECK1: omp_region.body5:
; CHECK1-NEXT: br label [[SEQ_PAR_MERGED2:%.*]]
; CHECK1: seq.par.merged2:
; CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[LOADGEP_A_ADDR]], align 4
; CHECK1-NEXT: [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC]], align 4
; CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP6]]
; CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP2]]
; CHECK1-NEXT: store i32 [[ADD1]], i32* [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC]], align 4
; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]]
; CHECK1: omp.par.merged.split.split.split:
@ -5659,8 +5683,7 @@ entry:
; CHECK1: omp_region.body:
; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
; CHECK1: seq.par.merged:
; CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[LOADGEP_A_ADDR]], align 4
; CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1
; CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
; CHECK1-NEXT: store i32 [[ADD]], i32* [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC]], align 4
; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK1: omp.par.merged.split:
@ -5777,8 +5800,7 @@ entry:
; CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
; CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
; CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK1-NEXT: ret void
@ -6122,17 +6144,21 @@ entry:
; CHECK2-LABEL: define {{[^@]+}}@merge_seq_float
; CHECK2-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
; CHECK2-NEXT: entry:
; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { float*, float* }, align 8
; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { float*, float*, float* }, align 8
; CHECK2-NEXT: [[F_RELOADED:%.*]] = alloca float, align 4
; CHECK2-NEXT: [[F_ADDR:%.*]] = alloca float, align 4
; CHECK2-NEXT: store float [[F]], float* [[F_ADDR]], align 4
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: store float [[F]], float* [[F_RELOADED]], align 4
; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK2: omp_parallel:
; CHECK2-NEXT: [[GEP_F_ADDR:%.*]] = getelementptr { float*, float* }, { float*, float* }* [[STRUCTARG]], i32 0, i32 0
; CHECK2-NEXT: [[GEP_F_RELOADED:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 0
; CHECK2-NEXT: store float* [[F_RELOADED]], float** [[GEP_F_RELOADED]], align 8
; CHECK2-NEXT: [[GEP_F_ADDR:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 1
; CHECK2-NEXT: store float* [[F_ADDR]], float** [[GEP_F_ADDR]], align 8
; CHECK2-NEXT: [[GEP_P:%.*]] = getelementptr { float*, float* }, { float*, float* }* [[STRUCTARG]], i32 0, i32 1
; CHECK2-NEXT: [[GEP_P:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 2
; CHECK2-NEXT: store float* [[P]], float** [[GEP_P]], align 8
; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { float*, float* }*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), { float*, float* }* [[STRUCTARG]])
; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { float*, float*, float* }*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), { float*, float*, float* }* [[STRUCTARG]])
; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK2: omp.par.outlined.exit:
; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@ -6143,16 +6169,19 @@ entry:
;
;
; CHECK2-LABEL: define {{[^@]+}}@merge_seq_float..omp_par
; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { float*, float* }* [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { float*, float*, float* }* [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK2-NEXT: omp.par.entry:
; CHECK2-NEXT: [[GEP_F_ADDR:%.*]] = getelementptr { float*, float* }, { float*, float* }* [[TMP0]], i32 0, i32 0
; CHECK2-NEXT: [[GEP_F_RELOADED:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 0
; CHECK2-NEXT: [[LOADGEP_F_RELOADED:%.*]] = load float*, float** [[GEP_F_RELOADED]], align 8
; CHECK2-NEXT: [[GEP_F_ADDR:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 1
; CHECK2-NEXT: [[LOADGEP_F_ADDR:%.*]] = load float*, float** [[GEP_F_ADDR]], align 8
; CHECK2-NEXT: [[GEP_P:%.*]] = getelementptr { float*, float* }, { float*, float* }* [[TMP0]], i32 0, i32 1
; CHECK2-NEXT: [[GEP_P:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 2
; CHECK2-NEXT: [[LOADGEP_P:%.*]] = load float*, float** [[GEP_P]], align 8
; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
; CHECK2-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT: [[TMP2:%.*]] = load float, float* [[LOADGEP_F_RELOADED]], align 4
; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]]
; CHECK2: omp.par.region:
; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]]
@ -6161,9 +6190,9 @@ entry:
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
; CHECK2-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK2: omp_region.end:
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
@ -6180,8 +6209,7 @@ entry:
; CHECK2: omp_region.body:
; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
; CHECK2: seq.par.merged:
; CHECK2-NEXT: [[TMP4:%.*]] = load float, float* [[LOADGEP_F_ADDR]], align 4
; CHECK2-NEXT: [[ADD:%.*]] = fadd float [[TMP4]], 0x40091EB860000000
; CHECK2-NEXT: [[ADD:%.*]] = fadd float [[TMP2]], 0x40091EB860000000
; CHECK2-NEXT: store float [[ADD]], float* [[LOADGEP_P]], align 4
; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK2: omp.par.merged.split:
@ -6400,20 +6428,24 @@ entry:
; CHECK2-LABEL: define {{[^@]+}}@merge_seq_par_use
; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK2-NEXT: entry:
; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32* }, align 8
; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32* }, align 8
; CHECK2-NEXT: [[A_RELOADED:%.*]] = alloca i32, align 4
; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT: [[B:%.*]] = alloca i32, align 4
; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4
; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK2: omp_parallel:
; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
; CHECK2-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
; CHECK2-NEXT: store i32* [[A_RELOADED]], i32** [[GEP_A_RELOADED]], align 8
; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
; CHECK2-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
; CHECK2-NEXT: [[GEP_B:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
; CHECK2-NEXT: [[GEP_B:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
; CHECK2-NEXT: store i32* [[B]], i32** [[GEP_B]], align 8
; CHECK2-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8*
; CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32* }*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), { i32*, i32* }* [[STRUCTARG]])
; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32* }*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32* }* [[STRUCTARG]])
; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK2: omp.par.outlined.exit:
; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@ -6426,16 +6458,19 @@ entry:
;
;
; CHECK2-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par
; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK2-NEXT: omp.par.entry:
; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 0
; CHECK2-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
; CHECK2-NEXT: [[LOADGEP_A_RELOADED:%.*]] = load i32*, i32** [[GEP_A_RELOADED]], align 8
; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
; CHECK2-NEXT: [[GEP_B:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 1
; CHECK2-NEXT: [[GEP_B:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
; CHECK2-NEXT: [[LOADGEP_B:%.*]] = load i32*, i32** [[GEP_B]], align 8
; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
; CHECK2-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[LOADGEP_A_RELOADED]], align 4
; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]]
; CHECK2: omp.par.region:
; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]]
@ -6444,9 +6479,9 @@ entry:
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
; CHECK2-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK2: omp_region.end:
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
@ -6463,9 +6498,8 @@ entry:
; CHECK2: omp_region.body:
; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
; CHECK2: seq.par.merged:
; CHECK2-NEXT: [[TMP4:%.*]] = bitcast i32* [[LOADGEP_B]] to i8*
; CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[LOADGEP_A_ADDR]], align 4
; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1
; CHECK2-NEXT: [[TMP5:%.*]] = bitcast i32* [[LOADGEP_B]] to i8*
; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
; CHECK2-NEXT: store i32 [[ADD]], i32* [[LOADGEP_B]], align 4
; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK2: omp.par.merged.split:
@ -6581,19 +6615,23 @@ entry:
; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq
; CHECK2-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
; CHECK2-NEXT: entry:
; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32* }, align 8
; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32* }, align 8
; CHECK2-NEXT: [[CANCEL1_RELOADED:%.*]] = alloca i32, align 4
; CHECK2-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
; CHECK2-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_RELOADED]], align 4
; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK2: omp_parallel:
; CHECK2-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
; CHECK2-NEXT: [[GEP_CANCEL1_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
; CHECK2-NEXT: store i32* [[CANCEL1_RELOADED]], i32** [[GEP_CANCEL1_RELOADED]], align 8
; CHECK2-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
; CHECK2-NEXT: store i32* [[CANCEL1_ADDR]], i32** [[GEP_CANCEL1_ADDR]], align 8
; CHECK2-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
; CHECK2-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
; CHECK2-NEXT: store i32* [[CANCEL2_ADDR]], i32** [[GEP_CANCEL2_ADDR]], align 8
; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32* }*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32* }* [[STRUCTARG]])
; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32* }*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32* }* [[STRUCTARG]])
; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK2: omp.par.outlined.exit:
; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@ -6604,16 +6642,19 @@ entry:
;
;
; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par
; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK2-NEXT: omp.par.entry:
; CHECK2-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 0
; CHECK2-NEXT: [[GEP_CANCEL1_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
; CHECK2-NEXT: [[LOADGEP_CANCEL1_RELOADED:%.*]] = load i32*, i32** [[GEP_CANCEL1_RELOADED]], align 8
; CHECK2-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
; CHECK2-NEXT: [[LOADGEP_CANCEL1_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL1_ADDR]], align 8
; CHECK2-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 1
; CHECK2-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
; CHECK2-NEXT: [[LOADGEP_CANCEL2_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL2_ADDR]], align 8
; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
; CHECK2-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[LOADGEP_CANCEL1_RELOADED]], align 4
; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]]
; CHECK2: omp.par.region:
; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]]
@ -6622,9 +6663,9 @@ entry:
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
; CHECK2-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK2: omp_region.end:
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
@ -6641,8 +6682,7 @@ entry:
; CHECK2: omp_region.body:
; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
; CHECK2: seq.par.merged:
; CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[LOADGEP_CANCEL1_ADDR]], align 4
; CHECK2-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP4]], 0
; CHECK2-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP2]], 0
; CHECK2-NEXT: [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32
; CHECK2-NEXT: store i32 [[LNOT_EXT]], i32* [[LOADGEP_CANCEL2_ADDR]], align 4
; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
@ -6762,21 +6802,25 @@ entry:
; CHECK2-LABEL: define {{[^@]+}}@merge_3_seq
; CHECK2-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr {
; CHECK2-NEXT: entry:
; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32* }, align 8
; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32*, i32* }, align 8
; CHECK2-NEXT: [[A_RELOADED:%.*]] = alloca i32, align 4
; CHECK2-NEXT: [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
; CHECK2-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4
; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK2: omp_parallel:
; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
; CHECK2-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
; CHECK2-NEXT: store i32* [[A_RELOADED]], i32** [[GEP_A_RELOADED]], align 8
; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
; CHECK2-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
; CHECK2-NEXT: [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
; CHECK2-NEXT: [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
; CHECK2-NEXT: store i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32** [[GEP_ADD_SEQ_OUTPUT_ALLOC]], align 8
; CHECK2-NEXT: [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
; CHECK2-NEXT: [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 3
; CHECK2-NEXT: store i32* [[ADD1_SEQ_OUTPUT_ALLOC]], i32** [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8
; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32* }*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32* }* [[STRUCTARG]])
; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32*, i32* }*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32*, i32* }* [[STRUCTARG]])
; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK2: omp.par.outlined.exit:
; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@ -6789,18 +6833,21 @@ entry:
;
;
; CHECK2-LABEL: define {{[^@]+}}@merge_3_seq..omp_par
; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK2-NEXT: omp.par.entry:
; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
; CHECK2-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
; CHECK2-NEXT: [[LOADGEP_A_RELOADED:%.*]] = load i32*, i32** [[GEP_A_RELOADED]], align 8
; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
; CHECK2-NEXT: [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
; CHECK2-NEXT: [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
; CHECK2-NEXT: [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = load i32*, i32** [[GEP_ADD_SEQ_OUTPUT_ALLOC]], align 8
; CHECK2-NEXT: [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
; CHECK2-NEXT: [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 3
; CHECK2-NEXT: [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = load i32*, i32** [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8
; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
; CHECK2-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[LOADGEP_A_RELOADED]], align 4
; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]]
; CHECK2: omp.par.region:
; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]]
@ -6809,9 +6856,9 @@ entry:
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
; CHECK2-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK2: omp_region.end:
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
@ -6821,9 +6868,9 @@ entry:
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
; CHECK2-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
; CHECK2-NEXT: br i1 [[TMP5]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
; CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
; CHECK2-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
; CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
; CHECK2: omp_region.end4:
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
@ -6840,9 +6887,8 @@ entry:
; CHECK2: omp_region.body5:
; CHECK2-NEXT: br label [[SEQ_PAR_MERGED2:%.*]]
; CHECK2: seq.par.merged2:
; CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[LOADGEP_A_ADDR]], align 4
; CHECK2-NEXT: [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC]], align 4
; CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP6]]
; CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP2]]
; CHECK2-NEXT: store i32 [[ADD1]], i32* [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC]], align 4
; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]]
; CHECK2: omp.par.merged.split.split.split:
@ -6853,8 +6899,7 @@ entry:
; CHECK2: omp_region.body:
; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
; CHECK2: seq.par.merged:
; CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[LOADGEP_A_ADDR]], align 4
; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1
; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
; CHECK2-NEXT: store i32 [[ADD]], i32* [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC]], align 4
; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK2: omp.par.merged.split:
@ -6971,8 +7016,7 @@ entry:
; CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
; CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
; CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK2-NEXT: ret void