[Attributor] Remove broken and duplicated load simplification

We look through loads in the "generic value traversal" and we
consequently don't need to look through them again in AAValueSimplify*.
The test changes stem from the fact that we allowed any simplified
value, incl. non-dynamically unique ones, as long as the underlying
memory was an alloca. This doesn't seem to make sense as allocas do not
protect against dynamically non-unique values. We need to make the
unique check better rather than excluding allocas. That in mind, we can
remove a lot of code by simply relying on the generic value traversal
load look through.

To soften the blow some minor adjustments have been made that allow more
simplification through the now used scheme and some tests have been
given a `norecurse` for now.
This commit is contained in:
Johannes Doerfert 2022-03-15 12:34:28 -05:00
parent a8610d7523
commit 857bf306d7
11 changed files with 540 additions and 617 deletions

View File

@ -214,7 +214,7 @@ bool AA::isDynamicallyUnique(Attributor &A, const AbstractAttribute &QueryingAA,
return !C->isThreadDependent();
// TODO: Inspect and cache more complex instructions.
if (auto *CB = dyn_cast<CallBase>(&V))
return CB->getNumOperands() == 0 && !CB->mayHaveSideEffects() &&
return CB->arg_size() == 0 && !CB->mayHaveSideEffects() &&
!CB->mayReadFromMemory();
const Function *Scope = nullptr;
if (auto *I = dyn_cast<Instruction>(&V))
@ -388,15 +388,16 @@ getPotentialCopiesOfMemoryValue(Attributor &A, Ty &I,
auto CheckAccess = [&](const AAPointerInfo::Access &Acc, bool IsExact) {
if ((IsLoad && !Acc.isWrite()) || (!IsLoad && !Acc.isRead()))
return true;
if (OnlyExact && !IsExact) {
if (IsLoad && Acc.isWrittenValueYetUndetermined())
return true;
if (OnlyExact && !IsExact &&
!isa_and_nonnull<UndefValue>(Acc.getWrittenValue())) {
LLVM_DEBUG(dbgs() << "Non exact access " << *Acc.getRemoteInst()
<< ", abort!\n");
return false;
}
if (IsLoad) {
assert(isa<LoadInst>(I) && "Expected load or store instruction only!");
if (Acc.isWrittenValueYetUndetermined())
return true;
if (!Acc.isWrittenValueUnknown()) {
NewCopies.push_back(Acc.getWrittenValue());
return true;

View File

@ -5389,76 +5389,6 @@ struct AAValueSimplifyImpl : AAValueSimplify {
SimplifiedAssociatedValue = &getAssociatedValue();
return AAValueSimplify::indicatePessimisticFixpoint();
}
static bool handleLoad(Attributor &A, const AbstractAttribute &AA,
LoadInst &L, function_ref<bool(Value &)> Union) {
auto UnionWrapper = [&](Value &V, Value &Obj) {
if (isa<AllocaInst>(Obj))
return Union(V);
if (!AA::isDynamicallyUnique(A, AA, V))
return false;
ValueToValueMapTy VMap;
if (!reproduceValue(A, AA, V, *L.getType(), &L, /* CheckOnly */ true,
VMap))
return false;
return Union(V);
};
Value &Ptr = *L.getPointerOperand();
SmallVector<Value *, 8> Objects;
bool UsedAssumedInformation = false;
if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, AA, &L,
UsedAssumedInformation))
return false;
const auto *TLI =
A.getInfoCache().getTargetLibraryInfoForFunction(*L.getFunction());
for (Value *Obj : Objects) {
LLVM_DEBUG(dbgs() << "Visit underlying object " << *Obj << "\n");
if (isa<UndefValue>(Obj))
continue;
if (isa<ConstantPointerNull>(Obj)) {
// A null pointer access can be undefined but any offset from null may
// be OK. We do not try to optimize the latter.
if (!NullPointerIsDefined(L.getFunction(),
Ptr.getType()->getPointerAddressSpace()) &&
A.getAssumedSimplified(Ptr, AA, UsedAssumedInformation) == Obj)
continue;
return false;
}
Constant *InitialVal = AA::getInitialValueForObj(*Obj, *L.getType(), TLI);
if (!InitialVal || !Union(*InitialVal))
return false;
LLVM_DEBUG(dbgs() << "Underlying object amenable to load-store "
"propagation, checking accesses next.\n");
auto CheckAccess = [&](const AAPointerInfo::Access &Acc, bool IsExact) {
LLVM_DEBUG(dbgs() << " - visit access " << Acc << "\n");
if (Acc.isWrittenValueYetUndetermined())
return true;
Value *Content = Acc.getWrittenValue();
if (!Content)
return false;
Value *CastedContent =
AA::getWithType(*Content, *AA.getAssociatedType());
if (!CastedContent)
return false;
if (IsExact)
return UnionWrapper(*CastedContent, *Obj);
if (auto *C = dyn_cast<Constant>(CastedContent))
if (C->isNullValue() || C->isAllOnesValue() || isa<UndefValue>(C))
return UnionWrapper(*CastedContent, *Obj);
return false;
};
auto &PI = A.getAAFor<AAPointerInfo>(AA, IRPosition::value(*Obj),
DepClassTy::REQUIRED);
if (!PI.forallInterferingAccesses(A, AA, L, CheckAccess))
return false;
}
return true;
}
};
struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
@ -5685,15 +5615,6 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
return true;
}
bool updateWithLoad(Attributor &A, LoadInst &L) {
auto Union = [&](Value &V) {
SimplifiedAssociatedValue = AA::combineOptionalValuesInAAValueLatice(
SimplifiedAssociatedValue, &V, L.getType());
return SimplifiedAssociatedValue != Optional<Value *>(nullptr);
};
return handleLoad(A, *this, L, Union);
}
/// Use the generic, non-optimistic InstSimplfy functionality if we managed to
/// simplify any operand of the instruction \p I. Return true if successful,
/// in that case SimplifiedAssociatedValue will be updated.
@ -5757,9 +5678,6 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
if (!Stripped && this == &AA) {
if (auto *I = dyn_cast<Instruction>(&V)) {
if (auto *LI = dyn_cast<LoadInst>(&V))
if (updateWithLoad(A, *LI))
return true;
if (auto *Cmp = dyn_cast<CmpInst>(&V))
if (handleCmp(A, *Cmp))
return true;
@ -9253,30 +9171,6 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
: ChangeStatus::CHANGED;
}
ChangeStatus updateWithLoad(Attributor &A, LoadInst &L) {
if (!L.getType()->isIntegerTy())
return indicatePessimisticFixpoint();
auto Union = [&](Value &V) {
if (isa<UndefValue>(V)) {
unionAssumedWithUndef();
return true;
}
if (ConstantInt *CI = dyn_cast<ConstantInt>(&V)) {
unionAssumed(CI->getValue());
return true;
}
return false;
};
auto AssumedBefore = getAssumed();
if (!AAValueSimplifyImpl::handleLoad(A, *this, L, Union))
return indicatePessimisticFixpoint();
return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
: ChangeStatus::CHANGED;
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
Value &V = getAssociatedValue();
@ -9297,9 +9191,6 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
if (auto *PHI = dyn_cast<PHINode>(I))
return updateWithPHINode(A, PHI);
if (auto *L = dyn_cast<LoadInst>(I))
return updateWithLoad(A, *L);
return indicatePessimisticFixpoint();
}

View File

@ -17,22 +17,14 @@ define i32 @bar(i32 %arg) {
; IS________OPM-NEXT: [[CALL:%.*]] = call i32 @foo(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[STACK]])
; IS________OPM-NEXT: ret i32 [[CALL]]
;
; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@bar
; IS__TUNIT_NPM-SAME: (i32 [[ARG:%.*]]) {
; IS__TUNIT_NPM-NEXT: entry:
; IS__TUNIT_NPM-NEXT: [[STACK:%.*]] = alloca i32, align 4
; IS__TUNIT_NPM-NEXT: store i32 [[ARG]], i32* [[STACK]], align 4
; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[STACK]], align 4
; IS__TUNIT_NPM-NEXT: [[CALL:%.*]] = call i32 @foo(i32 [[TMP0]])
; IS__TUNIT_NPM-NEXT: ret i32 [[CALL]]
;
; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@bar
; IS__CGSCC_NPM-SAME: (i32 returned [[ARG:%.*]]) {
; IS__CGSCC_NPM-NEXT: entry:
; IS__CGSCC_NPM-NEXT: [[STACK:%.*]] = alloca i32, align 4
; IS__CGSCC_NPM-NEXT: store i32 [[ARG]], i32* [[STACK]], align 4
; IS__CGSCC_NPM-NEXT: [[CALL:%.*]] = call i32 @foo(i32 [[ARG]])
; IS__CGSCC_NPM-NEXT: ret i32 [[ARG]]
; IS________NPM-LABEL: define {{[^@]+}}@bar
; IS________NPM-SAME: (i32 [[ARG:%.*]]) {
; IS________NPM-NEXT: entry:
; IS________NPM-NEXT: [[STACK:%.*]] = alloca i32, align 4
; IS________NPM-NEXT: store i32 [[ARG]], i32* [[STACK]], align 4
; IS________NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[STACK]], align 4
; IS________NPM-NEXT: [[CALL:%.*]] = call i32 @foo(i32 [[TMP0]])
; IS________NPM-NEXT: ret i32 [[CALL]]
;
entry:
%stack = alloca i32

View File

@ -85,7 +85,7 @@ define internal void @t0_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a,
; IS________OPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
; IS________OPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
; IS________OPM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
; IS________OPM-NEXT: tail call void @t0_check(i32* align 256 [[A]], i64 noundef 99, i32* noundef nonnull align 32 dereferenceable(4) [[TMP0]])
; IS________OPM-NEXT: tail call void @t0_check(i32* align 256 [[A]], i64 noundef 99, i32* [[TMP0]])
; IS________OPM-NEXT: ret void
;
; IS________NPM-LABEL: define {{[^@]+}}@t0_callback_callee
@ -94,7 +94,7 @@ define internal void @t0_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a,
; IS________NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
; IS________NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
; IS________NPM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
; IS________NPM-NEXT: tail call void @t0_check(i32* align 256 [[A]], i64 noundef 99, i32* noundef nonnull align 32 dereferenceable(4) [[TMP0]])
; IS________NPM-NEXT: tail call void @t0_check(i32* align 256 [[A]], i64 noundef 99, i32* [[TMP0]])
; IS________NPM-NEXT: ret void
;
entry:
@ -186,7 +186,7 @@ define internal void @t1_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a,
; IS________OPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
; IS________OPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
; IS________OPM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
; IS________OPM-NEXT: tail call void @t1_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture noundef nonnull align 32 dereferenceable(4) [[TMP0]])
; IS________OPM-NEXT: tail call void @t1_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture [[TMP0]])
; IS________OPM-NEXT: ret void
;
; IS________NPM: Function Attrs: nosync
@ -196,7 +196,7 @@ define internal void @t1_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a,
; IS________NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
; IS________NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
; IS________NPM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
; IS________NPM-NEXT: tail call void @t1_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture noundef nonnull align 32 dereferenceable(4) [[TMP0]])
; IS________NPM-NEXT: tail call void @t1_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture [[TMP0]])
; IS________NPM-NEXT: ret void
;
entry:
@ -287,7 +287,7 @@ define internal void @t2_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a,
; IS________OPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
; IS________OPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
; IS________OPM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
; IS________OPM-NEXT: tail call void @t2_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture noundef nonnull align 32 dereferenceable(4) [[TMP0]])
; IS________OPM-NEXT: tail call void @t2_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture [[TMP0]])
; IS________OPM-NEXT: ret void
;
; IS________NPM-LABEL: define {{[^@]+}}@t2_callback_callee
@ -296,7 +296,7 @@ define internal void @t2_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a,
; IS________NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
; IS________NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
; IS________NPM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
; IS________NPM-NEXT: tail call void @t2_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture noundef nonnull align 32 dereferenceable(4) [[TMP0]])
; IS________NPM-NEXT: tail call void @t2_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture [[TMP0]])
; IS________NPM-NEXT: ret void
;
entry:
@ -392,7 +392,7 @@ define internal void @t3_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a,
; IS________OPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
; IS________OPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
; IS________OPM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
; IS________OPM-NEXT: tail call void @t3_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture noundef nonnull align 32 dereferenceable(4) [[TMP0]])
; IS________OPM-NEXT: tail call void @t3_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture [[TMP0]])
; IS________OPM-NEXT: ret void
;
; IS________NPM-LABEL: define {{[^@]+}}@t3_callback_callee
@ -401,7 +401,7 @@ define internal void @t3_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a,
; IS________NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
; IS________NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
; IS________NPM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
; IS________NPM-NEXT: tail call void @t3_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture noundef nonnull align 32 dereferenceable(4) [[TMP0]])
; IS________NPM-NEXT: tail call void @t3_check(i32* nocapture align 256 [[A]], i64 noundef 99, i32* nocapture [[TMP0]])
; IS________NPM-NEXT: ret void
;
entry:

View File

@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=11 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=8 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM

View File

@ -159,7 +159,8 @@ define void @f(i32 %x) {
; NOT_CGSCC_NPM-NEXT: entry:
; NOT_CGSCC_NPM-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4
; NOT_CGSCC_NPM-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4
; NOT_CGSCC_NPM-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0
; NOT_CGSCC_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[X_ADDR]], align 4
; NOT_CGSCC_NPM-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
; NOT_CGSCC_NPM-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; NOT_CGSCC_NPM: if.then:
; NOT_CGSCC_NPM-NEXT: br label [[IF_END]]
@ -171,6 +172,7 @@ define void @f(i32 %x) {
; IS__CGSCC_NPM-SAME: (i32 [[X:%.*]]) #[[ATTR0]] {
; IS__CGSCC_NPM-NEXT: entry:
; IS__CGSCC_NPM-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4
; IS__CGSCC_NPM-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4
; IS__CGSCC_NPM-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0
; IS__CGSCC_NPM-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; IS__CGSCC_NPM: if.then:

View File

@ -570,7 +570,7 @@ define i8* @complicated_args_byval2() {
; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@complicated_args_byval2() {
; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = load i8*, i8** getelementptr inbounds ([[STRUCT_X:%.*]], %struct.X* @S, i32 0, i32 0), align 8
; IS__CGSCC_NPM-NEXT: [[C:%.*]] = call i8* @test_byval2(i8* noalias nofree readnone "no-capture-maybe-returned" [[TMP1]])
; IS__CGSCC_NPM-NEXT: ret i8* [[TMP1]]
; IS__CGSCC_NPM-NEXT: ret i8* [[C]]
;
%c = call i8* @test_byval2(%struct.X* byval(%struct.X) @S)
ret i8* %c
@ -1247,8 +1247,8 @@ define i8 @memcpy_uses_store_caller(i8 %arg) {
declare i32 @speculatable() speculatable readnone
define i32 @test_speculatable_expr() {
; IS__TUNIT_OPM: Function Attrs: nosync readnone
define i32 @test_speculatable_expr() norecurse {
; IS__TUNIT_OPM: Function Attrs: norecurse nosync readnone
; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test_speculatable_expr
; IS__TUNIT_OPM-SAME: () #[[ATTR8:[0-9]+]] {
; IS__TUNIT_OPM-NEXT: [[STACK:%.*]] = alloca i32, align 4
@ -1258,7 +1258,7 @@ define i32 @test_speculatable_expr() {
; IS__TUNIT_OPM-NEXT: [[RSPEC:%.*]] = call i32 @ret_speculatable_expr(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[STACK]]) #[[ATTR14:[0-9]+]]
; IS__TUNIT_OPM-NEXT: ret i32 [[RSPEC]]
;
; IS__TUNIT_NPM: Function Attrs: nosync readnone
; IS__TUNIT_NPM: Function Attrs: norecurse nosync readnone
; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@test_speculatable_expr
; IS__TUNIT_NPM-SAME: () #[[ATTR7:[0-9]+]] {
; IS__TUNIT_NPM-NEXT: [[STACK:%.*]] = alloca i32, align 4
@ -1269,7 +1269,7 @@ define i32 @test_speculatable_expr() {
; IS__TUNIT_NPM-NEXT: [[RSPEC:%.*]] = call i32 @ret_speculatable_expr(i32 [[TMP1]]) #[[ATTR13:[0-9]+]]
; IS__TUNIT_NPM-NEXT: ret i32 [[RSPEC]]
;
; IS__CGSCC_OPM: Function Attrs: nosync readnone
; IS__CGSCC_OPM: Function Attrs: norecurse nosync readnone
; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test_speculatable_expr
; IS__CGSCC_OPM-SAME: () #[[ATTR9:[0-9]+]] {
; IS__CGSCC_OPM-NEXT: [[STACK:%.*]] = alloca i32, align 4
@ -1278,7 +1278,7 @@ define i32 @test_speculatable_expr() {
; IS__CGSCC_OPM-NEXT: [[RSPEC:%.*]] = call i32 @ret_speculatable_expr(i32* noalias nocapture nofree noundef nonnull readnone align 4 dereferenceable(4) [[STACK]]) #[[ATTR16:[0-9]+]]
; IS__CGSCC_OPM-NEXT: ret i32 [[RSPEC]]
;
; IS__CGSCC_NPM: Function Attrs: nosync readnone
; IS__CGSCC_NPM: Function Attrs: norecurse nosync readnone
; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test_speculatable_expr
; IS__CGSCC_NPM-SAME: () #[[ATTR9:[0-9]+]] {
; IS__CGSCC_NPM-NEXT: [[STACK:%.*]] = alloca i32, align 4
@ -1353,7 +1353,7 @@ define internal i32 @ret_speculatable_expr(i32* %mem, i32 %a2) {
; IS__TUNIT_OPM: attributes #[[ATTR5]] = { argmemonly nofree norecurse nosync nounwind writeonly }
; IS__TUNIT_OPM: attributes #[[ATTR6]] = { nofree nosync nounwind willreturn }
; IS__TUNIT_OPM: attributes #[[ATTR7:[0-9]+]] = { readnone speculatable }
; IS__TUNIT_OPM: attributes #[[ATTR8]] = { nosync readnone }
; IS__TUNIT_OPM: attributes #[[ATTR8]] = { norecurse nosync readnone }
; IS__TUNIT_OPM: attributes #[[ATTR9]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn }
; IS__TUNIT_OPM: attributes #[[ATTR10:[0-9]+]] = { argmemonly nofree nounwind willreturn }
; IS__TUNIT_OPM: attributes #[[ATTR11]] = { nofree nosync nounwind readnone willreturn }
@ -1368,7 +1368,7 @@ define internal i32 @ret_speculatable_expr(i32* %mem, i32 %a2) {
; IS__TUNIT_NPM: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn writeonly }
; IS__TUNIT_NPM: attributes #[[ATTR5]] = { nofree nosync nounwind willreturn }
; IS__TUNIT_NPM: attributes #[[ATTR6:[0-9]+]] = { readnone speculatable }
; IS__TUNIT_NPM: attributes #[[ATTR7]] = { nosync readnone }
; IS__TUNIT_NPM: attributes #[[ATTR7]] = { norecurse nosync readnone }
; IS__TUNIT_NPM: attributes #[[ATTR8]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn }
; IS__TUNIT_NPM: attributes #[[ATTR9:[0-9]+]] = { argmemonly nofree nounwind willreturn }
; IS__TUNIT_NPM: attributes #[[ATTR10]] = { nofree nosync nounwind readnone willreturn }
@ -1385,7 +1385,7 @@ define internal i32 @ret_speculatable_expr(i32* %mem, i32 %a2) {
; IS__CGSCC_OPM: attributes #[[ATTR6]] = { nofree nosync nounwind readnone willreturn }
; IS__CGSCC_OPM: attributes #[[ATTR7]] = { nofree nosync nounwind willreturn }
; IS__CGSCC_OPM: attributes #[[ATTR8:[0-9]+]] = { readnone speculatable }
; IS__CGSCC_OPM: attributes #[[ATTR9]] = { nosync readnone }
; IS__CGSCC_OPM: attributes #[[ATTR9]] = { norecurse nosync readnone }
; IS__CGSCC_OPM: attributes #[[ATTR10]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn }
; IS__CGSCC_OPM: attributes #[[ATTR11:[0-9]+]] = { argmemonly nofree nounwind willreturn }
; IS__CGSCC_OPM: attributes #[[ATTR12]] = { willreturn }
@ -1403,7 +1403,7 @@ define internal i32 @ret_speculatable_expr(i32* %mem, i32 %a2) {
; IS__CGSCC_NPM: attributes #[[ATTR6]] = { nofree nosync nounwind readnone willreturn }
; IS__CGSCC_NPM: attributes #[[ATTR7]] = { nofree nosync nounwind willreturn }
; IS__CGSCC_NPM: attributes #[[ATTR8:[0-9]+]] = { readnone speculatable }
; IS__CGSCC_NPM: attributes #[[ATTR9]] = { nosync readnone }
; IS__CGSCC_NPM: attributes #[[ATTR9]] = { norecurse nosync readnone }
; IS__CGSCC_NPM: attributes #[[ATTR10]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn }
; IS__CGSCC_NPM: attributes #[[ATTR11:[0-9]+]] = { argmemonly nofree nounwind willreturn }
; IS__CGSCC_NPM: attributes #[[ATTR12]] = { willreturn }

View File

@ -166,7 +166,8 @@ define void @pos_priv_mem() {
; CHECK-NEXT: call void @aligned_barrier()
; CHECK-NEXT: [[ARGC:%.*]] = addrspacecast i32 addrspace(5)* [[ARG]] to i32*
; CHECK-NEXT: store i32 [[B]], i32* [[ARGC]], align 4
; CHECK-NEXT: store i32 [[A]], i32* @PG1, align 4
; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[LOC]], align 4
; CHECK-NEXT: store i32 [[V]], i32* @PG1, align 4
; CHECK-NEXT: ret void
;
%arg = load i32 addrspace(5)*, i32 addrspace(5)** @GPtr5

View File

@ -1637,7 +1637,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU: if.then:
; AMDGPU-NEXT: br label [[RETURN:%.*]]
; AMDGPU: if.end:
; AMDGPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[A]], 1
; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
; AMDGPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]]
; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]]
; AMDGPU-NEXT: br label [[RETURN]]
@ -2613,7 +2614,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX: if.then:
; NVPTX-NEXT: br label [[RETURN:%.*]]
; NVPTX: if.end:
; NVPTX-NEXT: [[SUB:%.*]] = sub nsw i32 [[A]], 1
; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
; NVPTX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]]
; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]]
; NVPTX-NEXT: br label [[RETURN]]
@ -3344,7 +3346,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED: if.then:
; AMDGPU-DISABLED-NEXT: br label [[RETURN:%.*]]
; AMDGPU-DISABLED: if.end:
; AMDGPU-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[A]], 1
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
; AMDGPU-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]]
; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]]
; AMDGPU-DISABLED-NEXT: br label [[RETURN]]
@ -4045,7 +4048,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED: if.then:
; NVPTX-DISABLED-NEXT: br label [[RETURN:%.*]]
; NVPTX-DISABLED: if.end:
; NVPTX-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[A]], 1
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
; NVPTX-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]]
; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]]
; NVPTX-DISABLED-NEXT: br label [[RETURN]]

View File

@ -4928,21 +4928,17 @@ entry:
; CHECK1-LABEL: define {{[^@]+}}@merge_seq_float
; CHECK1-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
; CHECK1-NEXT: entry:
; CHECK1-NEXT: [[STRUCTARG:%.*]] = alloca { float*, float*, float* }, align 8
; CHECK1-NEXT: [[F_RELOADED:%.*]] = alloca float, align 4
; CHECK1-NEXT: [[STRUCTARG:%.*]] = alloca { float*, float* }, align 8
; CHECK1-NEXT: [[F_ADDR:%.*]] = alloca float, align 4
; CHECK1-NEXT: store float [[F]], float* [[F_ADDR]], align 4
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: store float [[F]], float* [[F_RELOADED]], align 4
; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK1: omp_parallel:
; CHECK1-NEXT: [[GEP_F_RELOADED:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 0
; CHECK1-NEXT: store float* [[F_RELOADED]], float** [[GEP_F_RELOADED]], align 8
; CHECK1-NEXT: [[GEP_F_ADDR:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 1
; CHECK1-NEXT: [[GEP_F_ADDR:%.*]] = getelementptr { float*, float* }, { float*, float* }* [[STRUCTARG]], i32 0, i32 0
; CHECK1-NEXT: store float* [[F_ADDR]], float** [[GEP_F_ADDR]], align 8
; CHECK1-NEXT: [[GEP_P:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 2
; CHECK1-NEXT: [[GEP_P:%.*]] = getelementptr { float*, float* }, { float*, float* }* [[STRUCTARG]], i32 0, i32 1
; CHECK1-NEXT: store float* [[P]], float** [[GEP_P]], align 8
; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { float*, float*, float* }*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), { float*, float*, float* }* [[STRUCTARG]])
; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { float*, float* }*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), { float*, float* }* [[STRUCTARG]])
; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK1: omp.par.outlined.exit:
; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@ -4953,19 +4949,16 @@ entry:
;
;
; CHECK1-LABEL: define {{[^@]+}}@merge_seq_float..omp_par
; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { float*, float*, float* }* [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { float*, float* }* [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK1-NEXT: omp.par.entry:
; CHECK1-NEXT: [[GEP_F_RELOADED:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 0
; CHECK1-NEXT: [[LOADGEP_F_RELOADED:%.*]] = load float*, float** [[GEP_F_RELOADED]], align 8
; CHECK1-NEXT: [[GEP_F_ADDR:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 1
; CHECK1-NEXT: [[GEP_F_ADDR:%.*]] = getelementptr { float*, float* }, { float*, float* }* [[TMP0]], i32 0, i32 0
; CHECK1-NEXT: [[LOADGEP_F_ADDR:%.*]] = load float*, float** [[GEP_F_ADDR]], align 8
; CHECK1-NEXT: [[GEP_P:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 2
; CHECK1-NEXT: [[GEP_P:%.*]] = getelementptr { float*, float* }, { float*, float* }* [[TMP0]], i32 0, i32 1
; CHECK1-NEXT: [[LOADGEP_P:%.*]] = load float*, float** [[GEP_P]], align 8
; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
; CHECK1-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
; CHECK1-NEXT: [[TMP2:%.*]] = load float, float* [[LOADGEP_F_RELOADED]], align 4
; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]]
; CHECK1: omp.par.region:
; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]]
@ -4974,9 +4967,9 @@ entry:
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
; CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK1: omp_region.end:
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
@ -4993,7 +4986,8 @@ entry:
; CHECK1: omp_region.body:
; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
; CHECK1: seq.par.merged:
; CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP2]], 0x40091EB860000000
; CHECK1-NEXT: [[TMP4:%.*]] = load float, float* [[LOADGEP_F_ADDR]], align 4
; CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP4]], 0x40091EB860000000
; CHECK1-NEXT: store float [[ADD]], float* [[LOADGEP_P]], align 4
; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK1: omp.par.merged.split:
@ -5212,24 +5206,20 @@ entry:
; CHECK1-LABEL: define {{[^@]+}}@merge_seq_par_use
; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK1-NEXT: entry:
; CHECK1-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32* }, align 8
; CHECK1-NEXT: [[A_RELOADED:%.*]] = alloca i32, align 4
; CHECK1-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32* }, align 8
; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4
; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4
; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK1: omp_parallel:
; CHECK1-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
; CHECK1-NEXT: store i32* [[A_RELOADED]], i32** [[GEP_A_RELOADED]], align 8
; CHECK1-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
; CHECK1-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
; CHECK1-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
; CHECK1-NEXT: [[GEP_B:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
; CHECK1-NEXT: [[GEP_B:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
; CHECK1-NEXT: store i32* [[B]], i32** [[GEP_B]], align 8
; CHECK1-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8*
; CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32* }*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32* }* [[STRUCTARG]])
; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32* }*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), { i32*, i32* }* [[STRUCTARG]])
; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK1: omp.par.outlined.exit:
; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@ -5242,19 +5232,16 @@ entry:
;
;
; CHECK1-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par
; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK1-NEXT: omp.par.entry:
; CHECK1-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
; CHECK1-NEXT: [[LOADGEP_A_RELOADED:%.*]] = load i32*, i32** [[GEP_A_RELOADED]], align 8
; CHECK1-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
; CHECK1-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 0
; CHECK1-NEXT: [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
; CHECK1-NEXT: [[GEP_B:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
; CHECK1-NEXT: [[GEP_B:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 1
; CHECK1-NEXT: [[LOADGEP_B:%.*]] = load i32*, i32** [[GEP_B]], align 8
; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
; CHECK1-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
; CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[LOADGEP_A_RELOADED]], align 4
; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]]
; CHECK1: omp.par.region:
; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]]
@ -5263,9 +5250,9 @@ entry:
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
; CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK1: omp_region.end:
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
@ -5282,8 +5269,9 @@ entry:
; CHECK1: omp_region.body:
; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
; CHECK1: seq.par.merged:
; CHECK1-NEXT: [[TMP5:%.*]] = bitcast i32* [[LOADGEP_B]] to i8*
; CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
; CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[LOADGEP_B]] to i8*
; CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[LOADGEP_A_ADDR]], align 4
; CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1
; CHECK1-NEXT: store i32 [[ADD]], i32* [[LOADGEP_B]], align 4
; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK1: omp.par.merged.split:
@ -5399,23 +5387,19 @@ entry:
; CHECK1-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq
; CHECK1-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
; CHECK1-NEXT: entry:
; CHECK1-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32* }, align 8
; CHECK1-NEXT: [[CANCEL1_RELOADED:%.*]] = alloca i32, align 4
; CHECK1-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32* }, align 8
; CHECK1-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
; CHECK1-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
; CHECK1-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
; CHECK1-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_RELOADED]], align 4
; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK1: omp_parallel:
; CHECK1-NEXT: [[GEP_CANCEL1_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
; CHECK1-NEXT: store i32* [[CANCEL1_RELOADED]], i32** [[GEP_CANCEL1_RELOADED]], align 8
; CHECK1-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
; CHECK1-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
; CHECK1-NEXT: store i32* [[CANCEL1_ADDR]], i32** [[GEP_CANCEL1_ADDR]], align 8
; CHECK1-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
; CHECK1-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
; CHECK1-NEXT: store i32* [[CANCEL2_ADDR]], i32** [[GEP_CANCEL2_ADDR]], align 8
; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32* }*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32* }* [[STRUCTARG]])
; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32* }*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32* }* [[STRUCTARG]])
; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK1: omp.par.outlined.exit:
; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@ -5426,19 +5410,16 @@ entry:
;
;
; CHECK1-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par
; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK1-NEXT: omp.par.entry:
; CHECK1-NEXT: [[GEP_CANCEL1_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
; CHECK1-NEXT: [[LOADGEP_CANCEL1_RELOADED:%.*]] = load i32*, i32** [[GEP_CANCEL1_RELOADED]], align 8
; CHECK1-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
; CHECK1-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 0
; CHECK1-NEXT: [[LOADGEP_CANCEL1_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL1_ADDR]], align 8
; CHECK1-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
; CHECK1-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 1
; CHECK1-NEXT: [[LOADGEP_CANCEL2_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL2_ADDR]], align 8
; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
; CHECK1-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
; CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[LOADGEP_CANCEL1_RELOADED]], align 4
; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]]
; CHECK1: omp.par.region:
; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]]
@ -5447,9 +5428,9 @@ entry:
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
; CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK1: omp_region.end:
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
@ -5466,7 +5447,8 @@ entry:
; CHECK1: omp_region.body:
; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
; CHECK1: seq.par.merged:
; CHECK1-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP2]], 0
; CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[LOADGEP_CANCEL1_ADDR]], align 4
; CHECK1-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP4]], 0
; CHECK1-NEXT: [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32
; CHECK1-NEXT: store i32 [[LNOT_EXT]], i32* [[LOADGEP_CANCEL2_ADDR]], align 4
; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
@ -5586,25 +5568,21 @@ entry:
; CHECK1-LABEL: define {{[^@]+}}@merge_3_seq
; CHECK1-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr {
; CHECK1-NEXT: entry:
; CHECK1-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32*, i32* }, align 8
; CHECK1-NEXT: [[A_RELOADED:%.*]] = alloca i32, align 4
; CHECK1-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32* }, align 8
; CHECK1-NEXT: [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
; CHECK1-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4
; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK1: omp_parallel:
; CHECK1-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
; CHECK1-NEXT: store i32* [[A_RELOADED]], i32** [[GEP_A_RELOADED]], align 8
; CHECK1-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
; CHECK1-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
; CHECK1-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
; CHECK1-NEXT: [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
; CHECK1-NEXT: [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
; CHECK1-NEXT: store i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32** [[GEP_ADD_SEQ_OUTPUT_ALLOC]], align 8
; CHECK1-NEXT: [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 3
; CHECK1-NEXT: [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
; CHECK1-NEXT: store i32* [[ADD1_SEQ_OUTPUT_ALLOC]], i32** [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8
; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32*, i32* }*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32*, i32* }* [[STRUCTARG]])
; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32* }*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32* }* [[STRUCTARG]])
; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK1: omp.par.outlined.exit:
; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@ -5617,21 +5595,18 @@ entry:
;
;
; CHECK1-LABEL: define {{[^@]+}}@merge_3_seq..omp_par
; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK1-NEXT: omp.par.entry:
; CHECK1-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
; CHECK1-NEXT: [[LOADGEP_A_RELOADED:%.*]] = load i32*, i32** [[GEP_A_RELOADED]], align 8
; CHECK1-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
; CHECK1-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
; CHECK1-NEXT: [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
; CHECK1-NEXT: [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
; CHECK1-NEXT: [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
; CHECK1-NEXT: [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = load i32*, i32** [[GEP_ADD_SEQ_OUTPUT_ALLOC]], align 8
; CHECK1-NEXT: [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 3
; CHECK1-NEXT: [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
; CHECK1-NEXT: [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = load i32*, i32** [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8
; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
; CHECK1-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
; CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[LOADGEP_A_RELOADED]], align 4
; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]]
; CHECK1: omp.par.region:
; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]]
@ -5640,9 +5615,9 @@ entry:
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
; CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK1: omp_region.end:
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
@ -5652,9 +5627,9 @@ entry:
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
; CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
; CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
; CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
; CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
; CHECK1-NEXT: br i1 [[TMP5]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
; CHECK1: omp_region.end4:
; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
@ -5671,8 +5646,9 @@ entry:
; CHECK1: omp_region.body5:
; CHECK1-NEXT: br label [[SEQ_PAR_MERGED2:%.*]]
; CHECK1: seq.par.merged2:
; CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[LOADGEP_A_ADDR]], align 4
; CHECK1-NEXT: [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC]], align 4
; CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP2]]
; CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP6]]
; CHECK1-NEXT: store i32 [[ADD1]], i32* [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC]], align 4
; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]]
; CHECK1: omp.par.merged.split.split.split:
@ -5683,7 +5659,8 @@ entry:
; CHECK1: omp_region.body:
; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
; CHECK1: seq.par.merged:
; CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
; CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[LOADGEP_A_ADDR]], align 4
; CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1
; CHECK1-NEXT: store i32 [[ADD]], i32* [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC]], align 4
; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK1: omp.par.merged.split:
@ -5800,7 +5777,8 @@ entry:
; CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
; CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK1-NEXT: ret void
@ -6144,21 +6122,17 @@ entry:
; CHECK2-LABEL: define {{[^@]+}}@merge_seq_float
; CHECK2-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
; CHECK2-NEXT: entry:
; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { float*, float*, float* }, align 8
; CHECK2-NEXT: [[F_RELOADED:%.*]] = alloca float, align 4
; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { float*, float* }, align 8
; CHECK2-NEXT: [[F_ADDR:%.*]] = alloca float, align 4
; CHECK2-NEXT: store float [[F]], float* [[F_ADDR]], align 4
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: store float [[F]], float* [[F_RELOADED]], align 4
; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK2: omp_parallel:
; CHECK2-NEXT: [[GEP_F_RELOADED:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 0
; CHECK2-NEXT: store float* [[F_RELOADED]], float** [[GEP_F_RELOADED]], align 8
; CHECK2-NEXT: [[GEP_F_ADDR:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 1
; CHECK2-NEXT: [[GEP_F_ADDR:%.*]] = getelementptr { float*, float* }, { float*, float* }* [[STRUCTARG]], i32 0, i32 0
; CHECK2-NEXT: store float* [[F_ADDR]], float** [[GEP_F_ADDR]], align 8
; CHECK2-NEXT: [[GEP_P:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 2
; CHECK2-NEXT: [[GEP_P:%.*]] = getelementptr { float*, float* }, { float*, float* }* [[STRUCTARG]], i32 0, i32 1
; CHECK2-NEXT: store float* [[P]], float** [[GEP_P]], align 8
; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { float*, float*, float* }*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), { float*, float*, float* }* [[STRUCTARG]])
; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { float*, float* }*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), { float*, float* }* [[STRUCTARG]])
; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK2: omp.par.outlined.exit:
; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@ -6169,19 +6143,16 @@ entry:
;
;
; CHECK2-LABEL: define {{[^@]+}}@merge_seq_float..omp_par
; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { float*, float*, float* }* [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { float*, float* }* [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK2-NEXT: omp.par.entry:
; CHECK2-NEXT: [[GEP_F_RELOADED:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 0
; CHECK2-NEXT: [[LOADGEP_F_RELOADED:%.*]] = load float*, float** [[GEP_F_RELOADED]], align 8
; CHECK2-NEXT: [[GEP_F_ADDR:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 1
; CHECK2-NEXT: [[GEP_F_ADDR:%.*]] = getelementptr { float*, float* }, { float*, float* }* [[TMP0]], i32 0, i32 0
; CHECK2-NEXT: [[LOADGEP_F_ADDR:%.*]] = load float*, float** [[GEP_F_ADDR]], align 8
; CHECK2-NEXT: [[GEP_P:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 2
; CHECK2-NEXT: [[GEP_P:%.*]] = getelementptr { float*, float* }, { float*, float* }* [[TMP0]], i32 0, i32 1
; CHECK2-NEXT: [[LOADGEP_P:%.*]] = load float*, float** [[GEP_P]], align 8
; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
; CHECK2-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT: [[TMP2:%.*]] = load float, float* [[LOADGEP_F_RELOADED]], align 4
; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]]
; CHECK2: omp.par.region:
; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]]
@ -6190,9 +6161,9 @@ entry:
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
; CHECK2-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK2: omp_region.end:
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
@ -6209,7 +6180,8 @@ entry:
; CHECK2: omp_region.body:
; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
; CHECK2: seq.par.merged:
; CHECK2-NEXT: [[ADD:%.*]] = fadd float [[TMP2]], 0x40091EB860000000
; CHECK2-NEXT: [[TMP4:%.*]] = load float, float* [[LOADGEP_F_ADDR]], align 4
; CHECK2-NEXT: [[ADD:%.*]] = fadd float [[TMP4]], 0x40091EB860000000
; CHECK2-NEXT: store float [[ADD]], float* [[LOADGEP_P]], align 4
; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK2: omp.par.merged.split:
@ -6428,24 +6400,20 @@ entry:
; CHECK2-LABEL: define {{[^@]+}}@merge_seq_par_use
; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK2-NEXT: entry:
; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32* }, align 8
; CHECK2-NEXT: [[A_RELOADED:%.*]] = alloca i32, align 4
; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32* }, align 8
; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT: [[B:%.*]] = alloca i32, align 4
; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4
; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK2: omp_parallel:
; CHECK2-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
; CHECK2-NEXT: store i32* [[A_RELOADED]], i32** [[GEP_A_RELOADED]], align 8
; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
; CHECK2-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
; CHECK2-NEXT: [[GEP_B:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
; CHECK2-NEXT: [[GEP_B:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
; CHECK2-NEXT: store i32* [[B]], i32** [[GEP_B]], align 8
; CHECK2-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8*
; CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32* }*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32* }* [[STRUCTARG]])
; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32* }*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), { i32*, i32* }* [[STRUCTARG]])
; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK2: omp.par.outlined.exit:
; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@ -6458,19 +6426,16 @@ entry:
;
;
; CHECK2-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par
; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK2-NEXT: omp.par.entry:
; CHECK2-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
; CHECK2-NEXT: [[LOADGEP_A_RELOADED:%.*]] = load i32*, i32** [[GEP_A_RELOADED]], align 8
; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 0
; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
; CHECK2-NEXT: [[GEP_B:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
; CHECK2-NEXT: [[GEP_B:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 1
; CHECK2-NEXT: [[LOADGEP_B:%.*]] = load i32*, i32** [[GEP_B]], align 8
; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
; CHECK2-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[LOADGEP_A_RELOADED]], align 4
; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]]
; CHECK2: omp.par.region:
; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]]
@ -6479,9 +6444,9 @@ entry:
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
; CHECK2-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK2: omp_region.end:
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
@ -6498,8 +6463,9 @@ entry:
; CHECK2: omp_region.body:
; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
; CHECK2: seq.par.merged:
; CHECK2-NEXT: [[TMP5:%.*]] = bitcast i32* [[LOADGEP_B]] to i8*
; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
; CHECK2-NEXT: [[TMP4:%.*]] = bitcast i32* [[LOADGEP_B]] to i8*
; CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[LOADGEP_A_ADDR]], align 4
; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1
; CHECK2-NEXT: store i32 [[ADD]], i32* [[LOADGEP_B]], align 4
; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK2: omp.par.merged.split:
@ -6615,23 +6581,19 @@ entry:
; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq
; CHECK2-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
; CHECK2-NEXT: entry:
; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32* }, align 8
; CHECK2-NEXT: [[CANCEL1_RELOADED:%.*]] = alloca i32, align 4
; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32* }, align 8
; CHECK2-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
; CHECK2-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_RELOADED]], align 4
; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK2: omp_parallel:
; CHECK2-NEXT: [[GEP_CANCEL1_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
; CHECK2-NEXT: store i32* [[CANCEL1_RELOADED]], i32** [[GEP_CANCEL1_RELOADED]], align 8
; CHECK2-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
; CHECK2-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
; CHECK2-NEXT: store i32* [[CANCEL1_ADDR]], i32** [[GEP_CANCEL1_ADDR]], align 8
; CHECK2-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
; CHECK2-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
; CHECK2-NEXT: store i32* [[CANCEL2_ADDR]], i32** [[GEP_CANCEL2_ADDR]], align 8
; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32* }*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32* }* [[STRUCTARG]])
; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32* }*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32* }* [[STRUCTARG]])
; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK2: omp.par.outlined.exit:
; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@ -6642,19 +6604,16 @@ entry:
;
;
; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par
; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK2-NEXT: omp.par.entry:
; CHECK2-NEXT: [[GEP_CANCEL1_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
; CHECK2-NEXT: [[LOADGEP_CANCEL1_RELOADED:%.*]] = load i32*, i32** [[GEP_CANCEL1_RELOADED]], align 8
; CHECK2-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
; CHECK2-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 0
; CHECK2-NEXT: [[LOADGEP_CANCEL1_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL1_ADDR]], align 8
; CHECK2-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
; CHECK2-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 1
; CHECK2-NEXT: [[LOADGEP_CANCEL2_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL2_ADDR]], align 8
; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
; CHECK2-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[LOADGEP_CANCEL1_RELOADED]], align 4
; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]]
; CHECK2: omp.par.region:
; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]]
@ -6663,9 +6622,9 @@ entry:
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
; CHECK2-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK2: omp_region.end:
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
@ -6682,7 +6641,8 @@ entry:
; CHECK2: omp_region.body:
; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
; CHECK2: seq.par.merged:
; CHECK2-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP2]], 0
; CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[LOADGEP_CANCEL1_ADDR]], align 4
; CHECK2-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP4]], 0
; CHECK2-NEXT: [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32
; CHECK2-NEXT: store i32 [[LNOT_EXT]], i32* [[LOADGEP_CANCEL2_ADDR]], align 4
; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
@ -6802,25 +6762,21 @@ entry:
; CHECK2-LABEL: define {{[^@]+}}@merge_3_seq
; CHECK2-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr {
; CHECK2-NEXT: entry:
; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32*, i32* }, align 8
; CHECK2-NEXT: [[A_RELOADED:%.*]] = alloca i32, align 4
; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32* }, align 8
; CHECK2-NEXT: [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
; CHECK2-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4
; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK2: omp_parallel:
; CHECK2-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
; CHECK2-NEXT: store i32* [[A_RELOADED]], i32** [[GEP_A_RELOADED]], align 8
; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
; CHECK2-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
; CHECK2-NEXT: [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
; CHECK2-NEXT: [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
; CHECK2-NEXT: store i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32** [[GEP_ADD_SEQ_OUTPUT_ALLOC]], align 8
; CHECK2-NEXT: [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 3
; CHECK2-NEXT: [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
; CHECK2-NEXT: store i32* [[ADD1_SEQ_OUTPUT_ALLOC]], i32** [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8
; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32*, i32* }*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32*, i32* }* [[STRUCTARG]])
; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32* }*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32* }* [[STRUCTARG]])
; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK2: omp.par.outlined.exit:
; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@ -6833,21 +6789,18 @@ entry:
;
;
; CHECK2-LABEL: define {{[^@]+}}@merge_3_seq..omp_par
; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK2-NEXT: omp.par.entry:
; CHECK2-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
; CHECK2-NEXT: [[LOADGEP_A_RELOADED:%.*]] = load i32*, i32** [[GEP_A_RELOADED]], align 8
; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
; CHECK2-NEXT: [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
; CHECK2-NEXT: [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
; CHECK2-NEXT: [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = load i32*, i32** [[GEP_ADD_SEQ_OUTPUT_ALLOC]], align 8
; CHECK2-NEXT: [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 3
; CHECK2-NEXT: [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
; CHECK2-NEXT: [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = load i32*, i32** [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8
; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
; CHECK2-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[LOADGEP_A_RELOADED]], align 4
; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]]
; CHECK2: omp.par.region:
; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]]
@ -6856,9 +6809,9 @@ entry:
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
; CHECK2-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK2: omp_region.end:
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
@ -6868,9 +6821,9 @@ entry:
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
; CHECK2-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
; CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
; CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
; CHECK2-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
; CHECK2-NEXT: br i1 [[TMP5]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
; CHECK2: omp_region.end4:
; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
@ -6887,8 +6840,9 @@ entry:
; CHECK2: omp_region.body5:
; CHECK2-NEXT: br label [[SEQ_PAR_MERGED2:%.*]]
; CHECK2: seq.par.merged2:
; CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[LOADGEP_A_ADDR]], align 4
; CHECK2-NEXT: [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC]], align 4
; CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP2]]
; CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP6]]
; CHECK2-NEXT: store i32 [[ADD1]], i32* [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC]], align 4
; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]]
; CHECK2: omp.par.merged.split.split.split:
@ -6899,7 +6853,8 @@ entry:
; CHECK2: omp_region.body:
; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
; CHECK2: seq.par.merged:
; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
; CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[LOADGEP_A_ADDR]], align 4
; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1
; CHECK2-NEXT: store i32 [[ADD]], i32* [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC]], align 4
; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK2: omp.par.merged.split:
@ -7016,7 +6971,8 @@ entry:
; CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
; CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK2-NEXT: ret void