forked from OSchip/llvm-project
Revert "[OpenMP][OpenMPIRBuilder] Implement loop unrolling."
Breaks build with -DBUILD_SHARED_LIBS=ON
```
CMake Error: The inter-target dependency graph contains the following strongly connected component (cycle):
"LLVMFrontendOpenMP" of type SHARED_LIBRARY
depends on "LLVMPasses" (weak)
"LLVMipo" of type SHARED_LIBRARY
depends on "LLVMFrontendOpenMP" (weak)
"LLVMCoroutines" of type SHARED_LIBRARY
depends on "LLVMipo" (weak)
"LLVMPasses" of type SHARED_LIBRARY
depends on "LLVMCoroutines" (weak)
depends on "LLVMipo" (weak)
At least one of these targets is not a STATIC_LIBRARY. Cyclic dependencies are allowed only among static libraries.
CMake Generate step failed. Build files cannot be regenerated correctly.
```
This reverts commit 707ce34b06
.
This commit is contained in:
parent
f5b997e6b7
commit
50634deaa5
|
@ -10572,11 +10572,6 @@ public:
|
|||
/// an OpenMP loop directive.
|
||||
StmtResult ActOnOpenMPCanonicalLoop(Stmt *AStmt);
|
||||
|
||||
/// Process a canonical OpenMP loop nest that can either be a canonical
|
||||
/// literal loop (ForStmt or CXXForRangeStmt), or the generated loop of an
|
||||
/// OpenMP loop transformation construct.
|
||||
StmtResult ActOnOpenMPLoopnest(Stmt *AStmt);
|
||||
|
||||
/// End of OpenMP region.
|
||||
///
|
||||
/// \param S Statement associated with the current OpenMP region.
|
||||
|
|
|
@ -1951,27 +1951,11 @@ llvm::CanonicalLoopInfo *
|
|||
CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) {
|
||||
assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented");
|
||||
|
||||
// The caller is processing the loop-associated directive processing the \p
|
||||
// Depth loops nested in \p S. Put the previous pending loop-associated
|
||||
// directive to the stack. If the current loop-associated directive is a loop
|
||||
// transformation directive, it will push its generated loops onto the stack
|
||||
// such that together with the loops left here they form the combined loop
|
||||
// nest for the parent loop-associated directive.
|
||||
int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth;
|
||||
ExpectedOMPLoopDepth = Depth;
|
||||
|
||||
EmitStmt(S);
|
||||
assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops");
|
||||
|
||||
// The last added loop is the outermost one.
|
||||
llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back();
|
||||
|
||||
// Pop the \p Depth loops requested by the call from that stack and restore
|
||||
// the previous context.
|
||||
OMPLoopNestStack.set_size(OMPLoopNestStack.size() - Depth);
|
||||
ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth;
|
||||
|
||||
return Result;
|
||||
return OMPLoopNestStack.back();
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) {
|
||||
|
@ -2601,46 +2585,6 @@ void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) {
|
|||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) {
|
||||
bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder;
|
||||
|
||||
if (UseOMPIRBuilder) {
|
||||
auto DL = SourceLocToDebugLoc(S.getBeginLoc());
|
||||
const Stmt *Inner = S.getRawStmt();
|
||||
|
||||
// Consume nested loop. Clear the entire remaining loop stack because a
|
||||
// fully unrolled loop is non-transformable. For partial unrolling the
|
||||
// generated outer loop is pushed back to the stack.
|
||||
llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
|
||||
OMPLoopNestStack.clear();
|
||||
|
||||
llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
|
||||
|
||||
bool NeedsUnrolledCLI = ExpectedOMPLoopDepth >= 1;
|
||||
llvm::CanonicalLoopInfo *UnrolledCLI = nullptr;
|
||||
|
||||
if (S.hasClausesOfKind<OMPFullClause>()) {
|
||||
assert(ExpectedOMPLoopDepth == 0);
|
||||
OMPBuilder.unrollLoopFull(DL, CLI);
|
||||
} else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
|
||||
uint64_t Factor = 0;
|
||||
if (Expr *FactorExpr = PartialClause->getFactor()) {
|
||||
Factor = FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue();
|
||||
assert(Factor >= 1 && "Only positive factors are valid");
|
||||
}
|
||||
OMPBuilder.unrollLoopPartial(DL, CLI, Factor,
|
||||
NeedsUnrolledCLI ? &UnrolledCLI : nullptr);
|
||||
} else {
|
||||
OMPBuilder.unrollLoopHeuristic(DL, CLI);
|
||||
}
|
||||
|
||||
assert((!NeedsUnrolledCLI || UnrolledCLI) &&
|
||||
"NeedsUnrolledCLI implies UnrolledCLI to be set");
|
||||
if (UnrolledCLI)
|
||||
OMPLoopNestStack.push_back(UnrolledCLI);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// This function is only called if the unrolled loop is not consumed by any
|
||||
// other loop-associated construct. Such a loop-associated construct will have
|
||||
// used the transformed AST.
|
||||
|
|
|
@ -291,10 +291,6 @@ public:
|
|||
/// nest would extend.
|
||||
SmallVector<llvm::CanonicalLoopInfo *, 4> OMPLoopNestStack;
|
||||
|
||||
/// Number of nested loop to be consumed by the last surrounding
|
||||
/// loop-associated directive.
|
||||
int ExpectedOMPLoopDepth = 0;
|
||||
|
||||
// CodeGen lambda for loops and support for ordered clause
|
||||
typedef llvm::function_ref<void(CodeGenFunction &, const OMPLoopDirective &,
|
||||
JumpDest)>
|
||||
|
|
|
@ -2562,7 +2562,8 @@ Parser::ParseOpenMPDeclarativeOrExecutableDirective(ParsedStmtContext StmtCtx) {
|
|||
|
||||
if (AssociatedStmt.isUsable() && isOpenMPLoopDirective(DKind) &&
|
||||
getLangOpts().OpenMPIRBuilder)
|
||||
AssociatedStmt = Actions.ActOnOpenMPLoopnest(AssociatedStmt.get());
|
||||
AssociatedStmt =
|
||||
Actions.ActOnOpenMPCanonicalLoop(AssociatedStmt.get());
|
||||
}
|
||||
AssociatedStmt = Actions.ActOnOpenMPRegionEnd(AssociatedStmt, Clauses);
|
||||
} else if (DKind == OMPD_target_update || DKind == OMPD_target_enter_data ||
|
||||
|
|
|
@ -5573,19 +5573,6 @@ StmtResult Sema::ActOnOpenMPCanonicalLoop(Stmt *AStmt) {
|
|||
LoopVarFunc, LVRef);
|
||||
}
|
||||
|
||||
StmtResult Sema::ActOnOpenMPLoopnest(Stmt *AStmt) {
|
||||
// Handle a literal loop.
|
||||
if (isa<ForStmt>(AStmt) || isa<CXXForRangeStmt>(AStmt))
|
||||
return ActOnOpenMPCanonicalLoop(AStmt);
|
||||
|
||||
// If not a literal loop, it must be the result of a loop transformation.
|
||||
OMPExecutableDirective *LoopTransform = cast<OMPExecutableDirective>(AStmt);
|
||||
assert(
|
||||
isOpenMPLoopTransformationDirective(LoopTransform->getDirectiveKind()) &&
|
||||
"Loop transformation directive expected");
|
||||
return LoopTransform;
|
||||
}
|
||||
|
||||
static ExprResult buildUserDefinedMapperRef(Sema &SemaRef, Scope *S,
|
||||
CXXScopeSpec &MapperIdScopeSpec,
|
||||
const DeclarationNameInfo &MapperId,
|
||||
|
|
|
@ -1,153 +0,0 @@
|
|||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs
|
||||
// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=51 -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
|
||||
// expected-no-diagnostics
|
||||
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
// CHECK-LABEL: define {{.*}}@unroll_full(
|
||||
// CHECK-NEXT: [[ENTRY:.*]]:
|
||||
// CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[C_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[D_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8
|
||||
// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4
|
||||
// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: store float* %[[A:.+]], float** %[[A_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 0, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 0
|
||||
// CHECK-NEXT: store i32* %[[I]], i32** %[[TMP0]], align 8
|
||||
// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[AGG_CAPTURED1]], i32 0, i32 0
|
||||
// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: store i32 %[[TMP2]], i32* %[[TMP1]], align 4
|
||||
// CHECK-NEXT: call void @__captured_stmt(i32* %[[DOTCOUNT_ADDR]], %struct.anon* %[[AGG_CAPTURED]])
|
||||
// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, i32* %[[DOTCOUNT_ADDR]], align 4
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]:
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_HEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_HEADER]]:
|
||||
// CHECK-NEXT: %[[OMP_LOOP_IV:.+]] = phi i32 [ 0, %[[OMP_LOOP_PREHEADER]] ], [ %[[OMP_LOOP_NEXT:.+]], %[[OMP_LOOP_INC:.+]] ]
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_COND:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_COND]]:
|
||||
// CHECK-NEXT: %[[OMP_LOOP_CMP:.+]] = icmp ult i32 %[[OMP_LOOP_IV]], %[[DOTCOUNT]]
|
||||
// CHECK-NEXT: br i1 %[[OMP_LOOP_CMP]], label %[[OMP_LOOP_BODY:.+]], label %[[OMP_LOOP_EXIT:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_BODY]]:
|
||||
// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[OMP_LOOP_IV]], %struct.anon.0* %[[AGG_CAPTURED1]])
|
||||
// CHECK-NEXT: %[[TMP3:.+]] = load float*, float** %[[B_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP4]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP3]], i64 %[[IDXPROM]]
|
||||
// CHECK-NEXT: %[[TMP5:.+]] = load float, float* %[[ARRAYIDX]], align 4
|
||||
// CHECK-NEXT: %[[TMP6:.+]] = load float*, float** %[[C_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP7]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP6]], i64 %[[IDXPROM2]]
|
||||
// CHECK-NEXT: %[[TMP8:.+]] = load float, float* %[[ARRAYIDX3]], align 4
|
||||
// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP5]], %[[TMP8]]
|
||||
// CHECK-NEXT: %[[TMP9:.+]] = load float*, float** %[[D_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM4:.+]] = sext i32 %[[TMP10]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, float* %[[TMP9]], i64 %[[IDXPROM4]]
|
||||
// CHECK-NEXT: %[[TMP11:.+]] = load float, float* %[[ARRAYIDX5]], align 4
|
||||
// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP11]]
|
||||
// CHECK-NEXT: %[[TMP12:.+]] = load float*, float** %[[A_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP13:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM7:.+]] = sext i32 %[[TMP13]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, float* %[[TMP12]], i64 %[[IDXPROM7]]
|
||||
// CHECK-NEXT: store float %[[MUL6]], float* %[[ARRAYIDX8]], align 4
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_INC]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_INC]]:
|
||||
// CHECK-NEXT: %[[OMP_LOOP_NEXT]] = add nuw i32 %[[OMP_LOOP_IV]], 1
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_EXIT]]:
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_AFTER]]:
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
void unroll_full(float *a, float *b, float *c, float *d) {
|
||||
#pragma omp unroll full
|
||||
for (int i = 0; i < 2; i++) {
|
||||
a[i] = b[i] * c[i] * d[i];
|
||||
}
|
||||
}
|
||||
|
||||
#endif // HEADER
|
||||
|
||||
// CHECK-LABEL: define {{.*}}@__captured_stmt(
|
||||
// CHECK-NEXT: [[ENTRY:.*]]:
|
||||
// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon*, align 8
|
||||
// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: store i32* %[[DISTANCE:.+]], i32** %[[DISTANCE_ADDR]], align 8
|
||||
// CHECK-NEXT: store %struct.anon* %[[__CONTEXT:.+]], %struct.anon** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon*, %struct.anon** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[TMP0]], i32 0, i32 0
|
||||
// CHECK-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8
|
||||
// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4
|
||||
// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4
|
||||
// CHECK-NEXT: store i32 2, i32* %[[DOTSTOP]], align 4
|
||||
// CHECK-NEXT: store i32 1, i32* %[[DOTSTEP]], align 4
|
||||
// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[DOTSTART]], align 4
|
||||
// CHECK-NEXT: %[[TMP5:.+]] = load i32, i32* %[[DOTSTOP]], align 4
|
||||
// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP4]], %[[TMP5]]
|
||||
// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_TRUE]]:
|
||||
// CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[DOTSTOP]], align 4
|
||||
// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4
|
||||
// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]]
|
||||
// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4
|
||||
// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]]
|
||||
// CHECK-NEXT: br label %[[COND_END:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_FALSE]]:
|
||||
// CHECK-NEXT: br label %[[COND_END]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_END]]:
|
||||
// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ]
|
||||
// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
|
||||
|
||||
// CHECK-LABEL: define {{.*}}@__captured_stmt.1(
|
||||
// CHECK-NEXT: [[ENTRY:.*]]:
|
||||
// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon.0*, align 8
|
||||
// CHECK-NEXT: store i32* %[[LOOPVAR:.+]], i32** %[[LOOPVAR_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 %[[LOGICAL:.+]], i32* %[[LOGICAL_ADDR]], align 4
|
||||
// CHECK-NEXT: store %struct.anon.0* %[[__CONTEXT:.+]], %struct.anon.0** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon.0*, %struct.anon.0** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[TMP0]], i32 0, i32 0
|
||||
// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4
|
||||
// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4
|
||||
// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]]
|
||||
// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]]
|
||||
// CHECK-NEXT: %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 %[[ADD]], i32* %[[TMP4]], align 4
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
|
||||
|
||||
// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4}
|
||||
// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51}
|
||||
// CHECK: ![[META2:[0-9]+]] =
|
||||
// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]}
|
||||
// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"}
|
||||
// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.full"}
|
|
@ -1,153 +0,0 @@
|
|||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs
|
||||
// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=51 -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
|
||||
// expected-no-diagnostics
|
||||
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
// CHECK-LABEL: define {{.*}}@unroll_heuristic(
|
||||
// CHECK-NEXT: [[ENTRY:.*]]:
|
||||
// CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[C_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[D_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8
|
||||
// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4
|
||||
// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: store float* %[[A:.+]], float** %[[A_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 0, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 0
|
||||
// CHECK-NEXT: store i32* %[[I]], i32** %[[TMP0]], align 8
|
||||
// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[AGG_CAPTURED1]], i32 0, i32 0
|
||||
// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: store i32 %[[TMP2]], i32* %[[TMP1]], align 4
|
||||
// CHECK-NEXT: call void @__captured_stmt(i32* %[[DOTCOUNT_ADDR]], %struct.anon* %[[AGG_CAPTURED]])
|
||||
// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, i32* %[[DOTCOUNT_ADDR]], align 4
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]:
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_HEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_HEADER]]:
|
||||
// CHECK-NEXT: %[[OMP_LOOP_IV:.+]] = phi i32 [ 0, %[[OMP_LOOP_PREHEADER]] ], [ %[[OMP_LOOP_NEXT:.+]], %[[OMP_LOOP_INC:.+]] ]
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_COND:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_COND]]:
|
||||
// CHECK-NEXT: %[[OMP_LOOP_CMP:.+]] = icmp ult i32 %[[OMP_LOOP_IV]], %[[DOTCOUNT]]
|
||||
// CHECK-NEXT: br i1 %[[OMP_LOOP_CMP]], label %[[OMP_LOOP_BODY:.+]], label %[[OMP_LOOP_EXIT:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_BODY]]:
|
||||
// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[OMP_LOOP_IV]], %struct.anon.0* %[[AGG_CAPTURED1]])
|
||||
// CHECK-NEXT: %[[TMP3:.+]] = load float*, float** %[[B_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP4]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP3]], i64 %[[IDXPROM]]
|
||||
// CHECK-NEXT: %[[TMP5:.+]] = load float, float* %[[ARRAYIDX]], align 4
|
||||
// CHECK-NEXT: %[[TMP6:.+]] = load float*, float** %[[C_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP7]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP6]], i64 %[[IDXPROM2]]
|
||||
// CHECK-NEXT: %[[TMP8:.+]] = load float, float* %[[ARRAYIDX3]], align 4
|
||||
// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP5]], %[[TMP8]]
|
||||
// CHECK-NEXT: %[[TMP9:.+]] = load float*, float** %[[D_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM4:.+]] = sext i32 %[[TMP10]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, float* %[[TMP9]], i64 %[[IDXPROM4]]
|
||||
// CHECK-NEXT: %[[TMP11:.+]] = load float, float* %[[ARRAYIDX5]], align 4
|
||||
// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP11]]
|
||||
// CHECK-NEXT: %[[TMP12:.+]] = load float*, float** %[[A_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP13:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM7:.+]] = sext i32 %[[TMP13]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, float* %[[TMP12]], i64 %[[IDXPROM7]]
|
||||
// CHECK-NEXT: store float %[[MUL6]], float* %[[ARRAYIDX8]], align 4
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_INC]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_INC]]:
|
||||
// CHECK-NEXT: %[[OMP_LOOP_NEXT]] = add nuw i32 %[[OMP_LOOP_IV]], 1
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_EXIT]]:
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_AFTER]]:
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
|
||||
void unroll_heuristic(float *a, float *b, float *c, float *d) {
|
||||
#pragma omp unroll
|
||||
for (int i = 0; i < 128; i++) {
|
||||
a[i] = b[i] * c[i] * d[i];
|
||||
}
|
||||
}
|
||||
|
||||
#endif // HEADER
|
||||
|
||||
// CHECK-LABEL: define {{.*}}@__captured_stmt(
|
||||
// CHECK-NEXT: [[ENTRY:.*]]:
|
||||
// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon*, align 8
|
||||
// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: store i32* %[[DISTANCE:.+]], i32** %[[DISTANCE_ADDR]], align 8
|
||||
// CHECK-NEXT: store %struct.anon* %[[__CONTEXT:.+]], %struct.anon** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon*, %struct.anon** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[TMP0]], i32 0, i32 0
|
||||
// CHECK-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8
|
||||
// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4
|
||||
// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4
|
||||
// CHECK-NEXT: store i32 128, i32* %[[DOTSTOP]], align 4
|
||||
// CHECK-NEXT: store i32 1, i32* %[[DOTSTEP]], align 4
|
||||
// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[DOTSTART]], align 4
|
||||
// CHECK-NEXT: %[[TMP5:.+]] = load i32, i32* %[[DOTSTOP]], align 4
|
||||
// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP4]], %[[TMP5]]
|
||||
// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_TRUE]]:
|
||||
// CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[DOTSTOP]], align 4
|
||||
// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4
|
||||
// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]]
|
||||
// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4
|
||||
// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]]
|
||||
// CHECK-NEXT: br label %[[COND_END:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_FALSE]]:
|
||||
// CHECK-NEXT: br label %[[COND_END]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_END]]:
|
||||
// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ]
|
||||
// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
|
||||
|
||||
// CHECK-LABEL: define {{.*}}@__captured_stmt.1(
|
||||
// CHECK-NEXT: [[ENTRY:.*]]:
|
||||
// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon.0*, align 8
|
||||
// CHECK-NEXT: store i32* %[[LOOPVAR:.+]], i32** %[[LOOPVAR_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 %[[LOGICAL:.+]], i32* %[[LOGICAL_ADDR]], align 4
|
||||
// CHECK-NEXT: store %struct.anon.0* %[[__CONTEXT:.+]], %struct.anon.0** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon.0*, %struct.anon.0** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[TMP0]], i32 0, i32 0
|
||||
// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4
|
||||
// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4
|
||||
// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]]
|
||||
// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]]
|
||||
// CHECK-NEXT: %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 %[[ADD]], i32* %[[TMP4]], align 4
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
|
||||
|
||||
// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4}
|
||||
// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51}
|
||||
// CHECK: ![[META2:[0-9]+]] =
|
||||
// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]]}
|
||||
// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"}
|
|
@ -1,153 +0,0 @@
|
|||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs
|
||||
// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=51 -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
|
||||
// expected-no-diagnostics
|
||||
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
// CHECK-LABEL: define {{.*}}@unroll_partial_factor(
|
||||
// CHECK-NEXT: [[ENTRY:.*]]:
|
||||
// CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[C_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[D_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8
|
||||
// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4
|
||||
// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: store float* %[[A:.+]], float** %[[A_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 0, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 0
|
||||
// CHECK-NEXT: store i32* %[[I]], i32** %[[TMP0]], align 8
|
||||
// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[AGG_CAPTURED1]], i32 0, i32 0
|
||||
// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: store i32 %[[TMP2]], i32* %[[TMP1]], align 4
|
||||
// CHECK-NEXT: call void @__captured_stmt(i32* %[[DOTCOUNT_ADDR]], %struct.anon* %[[AGG_CAPTURED]])
|
||||
// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, i32* %[[DOTCOUNT_ADDR]], align 4
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]:
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_HEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_HEADER]]:
|
||||
// CHECK-NEXT: %[[OMP_LOOP_IV:.+]] = phi i32 [ 0, %[[OMP_LOOP_PREHEADER]] ], [ %[[OMP_LOOP_NEXT:.+]], %[[OMP_LOOP_INC:.+]] ]
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_COND:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_COND]]:
|
||||
// CHECK-NEXT: %[[OMP_LOOP_CMP:.+]] = icmp ult i32 %[[OMP_LOOP_IV]], %[[DOTCOUNT]]
|
||||
// CHECK-NEXT: br i1 %[[OMP_LOOP_CMP]], label %[[OMP_LOOP_BODY:.+]], label %[[OMP_LOOP_EXIT:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_BODY]]:
|
||||
// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[OMP_LOOP_IV]], %struct.anon.0* %[[AGG_CAPTURED1]])
|
||||
// CHECK-NEXT: %[[TMP3:.+]] = load float*, float** %[[B_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP4]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP3]], i64 %[[IDXPROM]]
|
||||
// CHECK-NEXT: %[[TMP5:.+]] = load float, float* %[[ARRAYIDX]], align 4
|
||||
// CHECK-NEXT: %[[TMP6:.+]] = load float*, float** %[[C_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP7]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP6]], i64 %[[IDXPROM2]]
|
||||
// CHECK-NEXT: %[[TMP8:.+]] = load float, float* %[[ARRAYIDX3]], align 4
|
||||
// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP5]], %[[TMP8]]
|
||||
// CHECK-NEXT: %[[TMP9:.+]] = load float*, float** %[[D_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM4:.+]] = sext i32 %[[TMP10]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, float* %[[TMP9]], i64 %[[IDXPROM4]]
|
||||
// CHECK-NEXT: %[[TMP11:.+]] = load float, float* %[[ARRAYIDX5]], align 4
|
||||
// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP11]]
|
||||
// CHECK-NEXT: %[[TMP12:.+]] = load float*, float** %[[A_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP13:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM7:.+]] = sext i32 %[[TMP13]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, float* %[[TMP12]], i64 %[[IDXPROM7]]
|
||||
// CHECK-NEXT: store float %[[MUL6]], float* %[[ARRAYIDX8]], align 4
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_INC]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_INC]]:
|
||||
// CHECK-NEXT: %[[OMP_LOOP_NEXT]] = add nuw i32 %[[OMP_LOOP_IV]], 1
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_EXIT]]:
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_AFTER]]:
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
void unroll_partial_factor(float *a, float *b, float *c, float *d) {
|
||||
#pragma omp unroll partial(3)
|
||||
for (int i = 0; i < 2; i++) {
|
||||
a[i] = b[i] * c[i] * d[i];
|
||||
}
|
||||
}
|
||||
|
||||
#endif // HEADER
|
||||
|
||||
// CHECK-LABEL: define {{.*}}@__captured_stmt(
|
||||
// CHECK-NEXT: [[ENTRY:.*]]:
|
||||
// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon*, align 8
|
||||
// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: store i32* %[[DISTANCE:.+]], i32** %[[DISTANCE_ADDR]], align 8
|
||||
// CHECK-NEXT: store %struct.anon* %[[__CONTEXT:.+]], %struct.anon** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon*, %struct.anon** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[TMP0]], i32 0, i32 0
|
||||
// CHECK-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8
|
||||
// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4
|
||||
// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4
|
||||
// CHECK-NEXT: store i32 2, i32* %[[DOTSTOP]], align 4
|
||||
// CHECK-NEXT: store i32 1, i32* %[[DOTSTEP]], align 4
|
||||
// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[DOTSTART]], align 4
|
||||
// CHECK-NEXT: %[[TMP5:.+]] = load i32, i32* %[[DOTSTOP]], align 4
|
||||
// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP4]], %[[TMP5]]
|
||||
// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_TRUE]]:
|
||||
// CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[DOTSTOP]], align 4
|
||||
// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4
|
||||
// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]]
|
||||
// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4
|
||||
// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]]
|
||||
// CHECK-NEXT: br label %[[COND_END:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_FALSE]]:
|
||||
// CHECK-NEXT: br label %[[COND_END]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_END]]:
|
||||
// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ]
|
||||
// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
|
||||
|
||||
// CHECK-LABEL: define {{.*}}@__captured_stmt.1(
|
||||
// CHECK-NEXT: [[ENTRY:.*]]:
|
||||
// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon.0*, align 8
|
||||
// CHECK-NEXT: store i32* %[[LOOPVAR:.+]], i32** %[[LOOPVAR_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 %[[LOGICAL:.+]], i32* %[[LOGICAL_ADDR]], align 4
|
||||
// CHECK-NEXT: store %struct.anon.0* %[[__CONTEXT:.+]], %struct.anon.0** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon.0*, %struct.anon.0** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[TMP0]], i32 0, i32 0
|
||||
// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4
|
||||
// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4
|
||||
// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]]
|
||||
// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]]
|
||||
// CHECK-NEXT: %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 %[[ADD]], i32* %[[TMP4]], align 4
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
|
||||
|
||||
// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4}
|
||||
// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51}
|
||||
// CHECK: ![[META2:[0-9]+]] =
|
||||
// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]}
|
||||
// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"}
|
||||
// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 3}
|
|
@ -1,222 +0,0 @@
|
|||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs
|
||||
// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=51 -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
|
||||
// expected-no-diagnostics
|
||||
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
// CHECK-LABEL: define {{.*}}@unroll_partial_heuristic_for(
|
||||
// CHECK-NEXT: [[ENTRY:.*]]:
|
||||
// CHECK-NEXT: %[[N_ADDR:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[C_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[D_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8
|
||||
// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4
|
||||
// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[P_LASTITER:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[P_LOWERBOUND:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[P_UPPERBOUND:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[P_STRIDE:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: store i32 %[[N:.+]], i32* %[[N_ADDR]], align 4
|
||||
// CHECK-NEXT: store float* %[[A:.+]], float** %[[A_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 0, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 0
|
||||
// CHECK-NEXT: store i32* %[[I]], i32** %[[TMP0]], align 8
|
||||
// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 1
|
||||
// CHECK-NEXT: store i32* %[[N_ADDR]], i32** %[[TMP1]], align 8
|
||||
// CHECK-NEXT: %[[TMP2:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[AGG_CAPTURED1]], i32 0, i32 0
|
||||
// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[TMP2]], align 4
|
||||
// CHECK-NEXT: call void @__captured_stmt(i32* %[[DOTCOUNT_ADDR]], %struct.anon* %[[AGG_CAPTURED]])
|
||||
// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, i32* %[[DOTCOUNT_ADDR]], align 4
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]:
|
||||
// CHECK-NEXT: %[[TMP4:.+]] = udiv i32 %[[DOTCOUNT]], 13
|
||||
// CHECK-NEXT: %[[TMP5:.+]] = urem i32 %[[DOTCOUNT]], 13
|
||||
// CHECK-NEXT: %[[TMP6:.+]] = icmp ne i32 %[[TMP5]], 0
|
||||
// CHECK-NEXT: %[[TMP7:.+]] = zext i1 %[[TMP6]] to i32
|
||||
// CHECK-NEXT: %[[OMP_FLOOR0_TRIPCOUNT:.+]] = add nuw i32 %[[TMP4]], %[[TMP7]]
|
||||
// CHECK-NEXT: br label %[[OMP_FLOOR0_PREHEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_PREHEADER]]:
|
||||
// CHECK-NEXT: store i32 0, i32* %[[P_LOWERBOUND]], align 4
|
||||
// CHECK-NEXT: %[[TMP8:.+]] = sub i32 %[[OMP_FLOOR0_TRIPCOUNT]], 1
|
||||
// CHECK-NEXT: store i32 %[[TMP8]], i32* %[[P_UPPERBOUND]], align 4
|
||||
// CHECK-NEXT: store i32 1, i32* %[[P_STRIDE]], align 4
|
||||
// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
|
||||
// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]], i32 1, i32 1)
|
||||
// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[P_LOWERBOUND]], align 4
|
||||
// CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[P_UPPERBOUND]], align 4
|
||||
// CHECK-NEXT: %[[TMP11:.+]] = sub i32 %[[TMP10]], %[[TMP9]]
|
||||
// CHECK-NEXT: %[[TMP12:.+]] = add i32 %[[TMP11]], 1
|
||||
// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_HEADER]]:
|
||||
// CHECK-NEXT: %[[OMP_FLOOR0_IV:.+]] = phi i32 [ 0, %[[OMP_FLOOR0_PREHEADER]] ], [ %[[OMP_FLOOR0_NEXT:.+]], %[[OMP_FLOOR0_INC:.+]] ]
|
||||
// CHECK-NEXT: br label %[[OMP_FLOOR0_COND:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_COND]]:
|
||||
// CHECK-NEXT: %[[OMP_FLOOR0_CMP:.+]] = icmp ult i32 %[[OMP_FLOOR0_IV]], %[[TMP12]]
|
||||
// CHECK-NEXT: br i1 %[[OMP_FLOOR0_CMP]], label %[[OMP_FLOOR0_BODY:.+]], label %[[OMP_FLOOR0_EXIT:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_BODY]]:
|
||||
// CHECK-NEXT: %[[TMP13:.+]] = add i32 %[[OMP_FLOOR0_IV]], %[[TMP9]]
|
||||
// CHECK-NEXT: %[[TMP14:.+]] = icmp eq i32 %[[TMP13]], %[[OMP_FLOOR0_TRIPCOUNT]]
|
||||
// CHECK-NEXT: %[[TMP15:.+]] = select i1 %[[TMP14]], i32 %[[TMP5]], i32 13
|
||||
// CHECK-NEXT: br label %[[OMP_TILE0_PREHEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_PREHEADER]]:
|
||||
// CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_HEADER]]:
|
||||
// CHECK-NEXT: %[[OMP_TILE0_IV:.+]] = phi i32 [ 0, %[[OMP_TILE0_PREHEADER]] ], [ %[[OMP_TILE0_NEXT:.+]], %[[OMP_TILE0_INC:.+]] ]
|
||||
// CHECK-NEXT: br label %[[OMP_TILE0_COND:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_COND]]:
|
||||
// CHECK-NEXT: %[[OMP_TILE0_CMP:.+]] = icmp ult i32 %[[OMP_TILE0_IV]], %[[TMP15]]
|
||||
// CHECK-NEXT: br i1 %[[OMP_TILE0_CMP]], label %[[OMP_TILE0_BODY:.+]], label %[[OMP_TILE0_EXIT:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_BODY]]:
|
||||
// CHECK-NEXT: %[[TMP16:.+]] = mul nuw i32 13, %[[TMP13]]
|
||||
// CHECK-NEXT: %[[TMP17:.+]] = add nuw i32 %[[TMP16]], %[[OMP_TILE0_IV]]
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_BODY:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_BODY]]:
|
||||
// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[TMP17]], %struct.anon.0* %[[AGG_CAPTURED1]])
|
||||
// CHECK-NEXT: %[[TMP18:.+]] = load float*, float** %[[B_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP19:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP19]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP18]], i64 %[[IDXPROM]]
|
||||
// CHECK-NEXT: %[[TMP20:.+]] = load float, float* %[[ARRAYIDX]], align 4
|
||||
// CHECK-NEXT: %[[TMP21:.+]] = load float*, float** %[[C_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP22:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP22]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP21]], i64 %[[IDXPROM2]]
|
||||
// CHECK-NEXT: %[[TMP23:.+]] = load float, float* %[[ARRAYIDX3]], align 4
|
||||
// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP20]], %[[TMP23]]
|
||||
// CHECK-NEXT: %[[TMP24:.+]] = load float*, float** %[[D_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP25:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM4:.+]] = sext i32 %[[TMP25]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, float* %[[TMP24]], i64 %[[IDXPROM4]]
|
||||
// CHECK-NEXT: %[[TMP26:.+]] = load float, float* %[[ARRAYIDX5]], align 4
|
||||
// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP26]]
|
||||
// CHECK-NEXT: %[[TMP27:.+]] = load float*, float** %[[A_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP28:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM7:.+]] = sext i32 %[[TMP28]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, float* %[[TMP27]], i64 %[[IDXPROM7]]
|
||||
// CHECK-NEXT: store float %[[MUL6]], float* %[[ARRAYIDX8]], align 4
|
||||
// CHECK-NEXT: br label %[[OMP_TILE0_INC]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_INC]]:
|
||||
// CHECK-NEXT: %[[OMP_TILE0_NEXT]] = add nuw i32 %[[OMP_TILE0_IV]], 1
|
||||
// CHECK-NEXT: br label %[[OMP_TILE0_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_EXIT]]:
|
||||
// CHECK-NEXT: br label %[[OMP_TILE0_AFTER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_AFTER]]:
|
||||
// CHECK-NEXT: br label %[[OMP_FLOOR0_INC]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_INC]]:
|
||||
// CHECK-NEXT: %[[OMP_FLOOR0_NEXT]] = add nuw i32 %[[OMP_FLOOR0_IV]], 1
|
||||
// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_EXIT]]:
|
||||
// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]])
|
||||
// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM9:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
|
||||
// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @2, i32 %[[OMP_GLOBAL_THREAD_NUM9]])
|
||||
// CHECK-NEXT: br label %[[OMP_FLOOR0_AFTER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_AFTER]]:
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_AFTER]]:
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
|
||||
void unroll_partial_heuristic_for(int n, float *a, float *b, float *c, float *d) {
|
||||
#pragma omp for
|
||||
#pragma omp unroll partial(13)
|
||||
for (int i = 0; i < n; i++) {
|
||||
a[i] = b[i] * c[i] * d[i];
|
||||
}
|
||||
}
|
||||
|
||||
#endif // HEADER
|
||||
|
||||
// CHECK-LABEL: define {{.*}}@__captured_stmt(
|
||||
// CHECK-NEXT: [[ENTRY:.*]]:
|
||||
// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon*, align 8
|
||||
// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: store i32* %[[DISTANCE:.+]], i32** %[[DISTANCE_ADDR]], align 8
|
||||
// CHECK-NEXT: store %struct.anon* %[[__CONTEXT:.+]], %struct.anon** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon*, %struct.anon** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[TMP0]], i32 0, i32 0
|
||||
// CHECK-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8
|
||||
// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4
|
||||
// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4
|
||||
// CHECK-NEXT: %[[TMP4:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[TMP0]], i32 0, i32 1
|
||||
// CHECK-NEXT: %[[TMP5:.+]] = load i32*, i32** %[[TMP4]], align 8
|
||||
// CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[TMP5]], align 4
|
||||
// CHECK-NEXT: store i32 %[[TMP6]], i32* %[[DOTSTOP]], align 4
|
||||
// CHECK-NEXT: store i32 1, i32* %[[DOTSTEP]], align 4
|
||||
// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4
|
||||
// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTOP]], align 4
|
||||
// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP7]], %[[TMP8]]
|
||||
// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_TRUE]]:
|
||||
// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[DOTSTOP]], align 4
|
||||
// CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[DOTSTART]], align 4
|
||||
// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP9]], %[[TMP10]]
|
||||
// CHECK-NEXT: %[[TMP11:.+]] = load i32, i32* %[[DOTSTEP]], align 4
|
||||
// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP11]]
|
||||
// CHECK-NEXT: br label %[[COND_END:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_FALSE]]:
|
||||
// CHECK-NEXT: br label %[[COND_END]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_END]]:
|
||||
// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ]
|
||||
// CHECK-NEXT: %[[TMP12:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP12]], align 4
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
|
||||
|
||||
// CHECK-LABEL: define {{.*}}@__captured_stmt.1(
|
||||
// CHECK-NEXT: [[ENTRY:.*]]:
|
||||
// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon.0*, align 8
|
||||
// CHECK-NEXT: store i32* %[[LOOPVAR:.+]], i32** %[[LOOPVAR_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 %[[LOGICAL:.+]], i32* %[[LOGICAL_ADDR]], align 4
|
||||
// CHECK-NEXT: store %struct.anon.0* %[[__CONTEXT:.+]], %struct.anon.0** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon.0*, %struct.anon.0** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[TMP0]], i32 0, i32 0
|
||||
// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4
|
||||
// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4
|
||||
// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]]
|
||||
// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]]
|
||||
// CHECK-NEXT: %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 %[[ADD]], i32* %[[TMP4]], align 4
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
|
||||
|
||||
// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4}
|
||||
// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51}
|
||||
// CHECK: ![[META2:[0-9]+]] =
|
||||
// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]}
|
||||
// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"}
|
||||
// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 13}
|
|
@ -1,200 +0,0 @@
|
|||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs
|
||||
// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=51 -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
|
||||
// expected-no-diagnostics
|
||||
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
// CHECK-LABEL: define {{.*}}@unroll_partial_factor_for_collapse(
|
||||
// CHECK-NEXT: [[ENTRY:.*]]:
|
||||
// CHECK-NEXT: %[[M_ADDR:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[C_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[D_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[DOTOMP_IV:.+]] = alloca i64, align 8
|
||||
// CHECK-NEXT: %[[TMP:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[TMP1:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[DOTCAPTURE_EXPR_:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[J:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[DOTCAPTURE_EXPR_2:.+]] = alloca i64, align 8
|
||||
// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[DOTUNROLLED_IV_J:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[DOTOMP_LB:.+]] = alloca i64, align 8
|
||||
// CHECK-NEXT: %[[DOTOMP_UB:.+]] = alloca i64, align 8
|
||||
// CHECK-NEXT: %[[DOTOMP_STRIDE:.+]] = alloca i64, align 8
|
||||
// CHECK-NEXT: %[[DOTOMP_IS_LAST:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[I6:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[DOTUNROLLED_IV_J7:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[DOTUNROLL_INNER_IV_J:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: store i32 %[[M:.+]], i32* %[[M_ADDR]], align 4
|
||||
// CHECK-NEXT: store float* %[[A:.+]], float** %[[A_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP0:.+]] = load i32, i32* %[[M_ADDR]], align 4
|
||||
// CHECK-NEXT: store i32 %[[TMP0]], i32* %[[DOTCAPTURE_EXPR_]], align 4
|
||||
// CHECK-NEXT: store i32 0, i32* %[[J]], align 4
|
||||
// CHECK-NEXT: %[[TMP1_1:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
|
||||
// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP1_1]], 0
|
||||
// CHECK-NEXT: %[[DIV:.+]] = sdiv i32 %[[SUB]], 1
|
||||
// CHECK-NEXT: %[[CONV:.+]] = sext i32 %[[DIV]] to i64
|
||||
// CHECK-NEXT: %[[MUL:.+]] = mul nsw i64 %[[CONV]], 2
|
||||
// CHECK-NEXT: %[[SUB3:.+]] = sub nsw i64 %[[MUL]], 1
|
||||
// CHECK-NEXT: store i64 %[[SUB3]], i64* %[[DOTCAPTURE_EXPR_2]], align 8
|
||||
// CHECK-NEXT: store i32 0, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: store i32 0, i32* %[[DOTUNROLLED_IV_J]], align 4
|
||||
// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
|
||||
// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 0, %[[TMP2]]
|
||||
// CHECK-NEXT: br i1 %[[CMP]], label %[[OMP_PRECOND_THEN:.+]], label %[[OMP_PRECOND_END:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_PRECOND_THEN]]:
|
||||
// CHECK-NEXT: store i64 0, i64* %[[DOTOMP_LB]], align 8
|
||||
// CHECK-NEXT: %[[TMP3:.+]] = load i64, i64* %[[DOTCAPTURE_EXPR_2]], align 8
|
||||
// CHECK-NEXT: store i64 %[[TMP3]], i64* %[[DOTOMP_UB]], align 8
|
||||
// CHECK-NEXT: store i64 1, i64* %[[DOTOMP_STRIDE]], align 8
|
||||
// CHECK-NEXT: store i32 0, i32* %[[DOTOMP_IS_LAST]], align 4
|
||||
// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @3)
|
||||
// CHECK-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[DOTOMP_IS_LAST]], i64* %[[DOTOMP_LB]], i64* %[[DOTOMP_UB]], i64* %[[DOTOMP_STRIDE]], i64 1, i64 1)
|
||||
// CHECK-NEXT: %[[TMP4:.+]] = load i64, i64* %[[DOTOMP_UB]], align 8
|
||||
// CHECK-NEXT: %[[TMP5:.+]] = load i64, i64* %[[DOTCAPTURE_EXPR_2]], align 8
|
||||
// CHECK-NEXT: %[[CMP8:.+]] = icmp sgt i64 %[[TMP4]], %[[TMP5]]
|
||||
// CHECK-NEXT: br i1 %[[CMP8]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_TRUE]]:
|
||||
// CHECK-NEXT: %[[TMP6:.+]] = load i64, i64* %[[DOTCAPTURE_EXPR_2]], align 8
|
||||
// CHECK-NEXT: br label %[[COND_END:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_FALSE]]:
|
||||
// CHECK-NEXT: %[[TMP7:.+]] = load i64, i64* %[[DOTOMP_UB]], align 8
|
||||
// CHECK-NEXT: br label %[[COND_END]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_END]]:
|
||||
// CHECK-NEXT: %[[COND:.+]] = phi i64 [ %[[TMP6]], %[[COND_TRUE]] ], [ %[[TMP7]], %[[COND_FALSE]] ]
|
||||
// CHECK-NEXT: store i64 %[[COND]], i64* %[[DOTOMP_UB]], align 8
|
||||
// CHECK-NEXT: %[[TMP8:.+]] = load i64, i64* %[[DOTOMP_LB]], align 8
|
||||
// CHECK-NEXT: store i64 %[[TMP8]], i64* %[[DOTOMP_IV]], align 8
|
||||
// CHECK-NEXT: br label %[[OMP_INNER_FOR_COND:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_INNER_FOR_COND]]:
|
||||
// CHECK-NEXT: %[[TMP9:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8
|
||||
// CHECK-NEXT: %[[TMP10:.+]] = load i64, i64* %[[DOTOMP_UB]], align 8
|
||||
// CHECK-NEXT: %[[CMP10:.+]] = icmp sle i64 %[[TMP9]], %[[TMP10]]
|
||||
// CHECK-NEXT: br i1 %[[CMP10]], label %[[OMP_INNER_FOR_BODY:.+]], label %[[OMP_INNER_FOR_END:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_INNER_FOR_BODY]]:
|
||||
// CHECK-NEXT: %[[TMP11:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8
|
||||
// CHECK-NEXT: %[[DIV12:.+]] = sdiv i64 %[[TMP11]], 2
|
||||
// CHECK-NEXT: %[[MUL13:.+]] = mul nsw i64 %[[DIV12]], 1
|
||||
// CHECK-NEXT: %[[ADD:.+]] = add nsw i64 0, %[[MUL13]]
|
||||
// CHECK-NEXT: %[[CONV14:.+]] = trunc i64 %[[ADD]] to i32
|
||||
// CHECK-NEXT: store i32 %[[CONV14]], i32* %[[I6]], align 4
|
||||
// CHECK-NEXT: %[[TMP12:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8
|
||||
// CHECK-NEXT: %[[TMP13:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8
|
||||
// CHECK-NEXT: %[[DIV15:.+]] = sdiv i64 %[[TMP13]], 2
|
||||
// CHECK-NEXT: %[[MUL16:.+]] = mul nsw i64 %[[DIV15]], 2
|
||||
// CHECK-NEXT: %[[SUB17:.+]] = sub nsw i64 %[[TMP12]], %[[MUL16]]
|
||||
// CHECK-NEXT: %[[MUL18:.+]] = mul nsw i64 %[[SUB17]], 4
|
||||
// CHECK-NEXT: %[[ADD19:.+]] = add nsw i64 0, %[[MUL18]]
|
||||
// CHECK-NEXT: %[[CONV20:.+]] = trunc i64 %[[ADD19]] to i32
|
||||
// CHECK-NEXT: store i32 %[[CONV20]], i32* %[[DOTUNROLLED_IV_J7]], align 4
|
||||
// CHECK-NEXT: %[[TMP14:.+]] = load i32, i32* %[[DOTUNROLLED_IV_J7]], align 4
|
||||
// CHECK-NEXT: store i32 %[[TMP14]], i32* %[[DOTUNROLL_INNER_IV_J]], align 4
|
||||
// CHECK-NEXT: br label %[[FOR_COND:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[FOR_COND]]:
|
||||
// CHECK-NEXT: %[[TMP15:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4
|
||||
// CHECK-NEXT: %[[TMP16:.+]] = load i32, i32* %[[DOTUNROLLED_IV_J7]], align 4
|
||||
// CHECK-NEXT: %[[ADD21:.+]] = add nsw i32 %[[TMP16]], 4
|
||||
// CHECK-NEXT: %[[CMP22:.+]] = icmp sle i32 %[[TMP15]], %[[ADD21]]
|
||||
// CHECK-NEXT: br i1 %[[CMP22]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[LAND_RHS]]:
|
||||
// CHECK-NEXT: %[[TMP17:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4
|
||||
// CHECK-NEXT: %[[CMP24:.+]] = icmp sle i32 %[[TMP17]], 8
|
||||
// CHECK-NEXT: br label %[[LAND_END]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[LAND_END]]:
|
||||
// CHECK-NEXT: %[[TMP18:.+]] = phi i1 [ false, %[[FOR_COND]] ], [ %[[CMP24]], %[[LAND_RHS]] ]
|
||||
// CHECK-NEXT: br i1 %[[TMP18]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[FOR_BODY]]:
|
||||
// CHECK-NEXT: %[[TMP19:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4
|
||||
// CHECK-NEXT: %[[MUL26:.+]] = mul nsw i32 %[[TMP19]], 1
|
||||
// CHECK-NEXT: %[[ADD27:.+]] = add nsw i32 0, %[[MUL26]]
|
||||
// CHECK-NEXT: store i32 %[[ADD27]], i32* %[[J]], align 4
|
||||
// CHECK-NEXT: %[[TMP20:.+]] = load float*, float** %[[B_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP21:.+]] = load i32, i32* %[[I6]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP21]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP20]], i64 %[[IDXPROM]]
|
||||
// CHECK-NEXT: %[[TMP22:.+]] = load float, float* %[[ARRAYIDX]], align 4
|
||||
// CHECK-NEXT: %[[TMP23:.+]] = load float*, float** %[[C_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP24:.+]] = load i32, i32* %[[I6]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM28:.+]] = sext i32 %[[TMP24]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX29:.+]] = getelementptr inbounds float, float* %[[TMP23]], i64 %[[IDXPROM28]]
|
||||
// CHECK-NEXT: %[[TMP25:.+]] = load float, float* %[[ARRAYIDX29]], align 4
|
||||
// CHECK-NEXT: %[[TMP26:.+]] = load float*, float** %[[D_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP27:.+]] = load i32, i32* %[[J]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM30:.+]] = sext i32 %[[TMP27]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX31:.+]] = getelementptr inbounds float, float* %[[TMP26]], i64 %[[IDXPROM30]]
|
||||
// CHECK-NEXT: %[[TMP28:.+]] = load float, float* %[[ARRAYIDX31]], align 4
|
||||
// CHECK-NEXT: %[[MUL32:.+]] = fmul float %[[TMP25]], %[[TMP28]]
|
||||
// CHECK-NEXT: %[[ADD33:.+]] = fadd float %[[TMP22]], %[[MUL32]]
|
||||
// CHECK-NEXT: %[[TMP29:.+]] = load float*, float** %[[A_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP30:.+]] = load i32, i32* %[[I6]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM34:.+]] = sext i32 %[[TMP30]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX35:.+]] = getelementptr inbounds float, float* %[[TMP29]], i64 %[[IDXPROM34]]
|
||||
// CHECK-NEXT: %[[TMP31:.+]] = load float, float* %[[ARRAYIDX35]], align 4
|
||||
// CHECK-NEXT: %[[ADD36:.+]] = fadd float %[[TMP31]], %[[ADD33]]
|
||||
// CHECK-NEXT: store float %[[ADD36]], float* %[[ARRAYIDX35]], align 4
|
||||
// CHECK-NEXT: br label %[[FOR_INC:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[FOR_INC]]:
|
||||
// CHECK-NEXT: %[[TMP32:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4
|
||||
// CHECK-NEXT: %[[INC:.+]] = add nsw i32 %[[TMP32]], 1
|
||||
// CHECK-NEXT: store i32 %[[INC]], i32* %[[DOTUNROLL_INNER_IV_J]], align 4
|
||||
// CHECK-NEXT: br label %[[FOR_COND]], !llvm.loop ![[LOOP3:[0-9]+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[FOR_END]]:
|
||||
// CHECK-NEXT: br label %[[OMP_BODY_CONTINUE:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_BODY_CONTINUE]]:
|
||||
// CHECK-NEXT: br label %[[OMP_INNER_FOR_INC:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_INNER_FOR_INC]]:
|
||||
// CHECK-NEXT: %[[TMP33:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8
|
||||
// CHECK-NEXT: %[[ADD37:.+]] = add nsw i64 %[[TMP33]], 1
|
||||
// CHECK-NEXT: store i64 %[[ADD37]], i64* %[[DOTOMP_IV]], align 8
|
||||
// CHECK-NEXT: br label %[[OMP_INNER_FOR_COND]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_INNER_FOR_END]]:
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_EXIT:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_EXIT]]:
|
||||
// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM38:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @5)
|
||||
// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM38]])
|
||||
// CHECK-NEXT: br label %[[OMP_PRECOND_END]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_PRECOND_END]]:
|
||||
// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM39:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @7)
|
||||
// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @6, i32 %[[OMP_GLOBAL_THREAD_NUM39]])
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
void unroll_partial_factor_for_collapse(int m, float *a, float *b, float *c, float *d) {
|
||||
#pragma omp for collapse(2)
|
||||
for (int i = 0; i < m; i++) {
|
||||
#pragma omp unroll partial(4)
|
||||
for (int j = 0; j < 8; j++) {
|
||||
a[i] += b[i] + c[i] * d[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif // HEADER
|
||||
|
||||
// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4}
|
||||
// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51}
|
||||
// CHECK: ![[META2:[0-9]+]] =
|
||||
// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]}
|
||||
// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.mustprogress"}
|
||||
// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 4}
|
|
@ -1,152 +0,0 @@
|
|||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs
|
||||
// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=51 -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
|
||||
// expected-no-diagnostics
|
||||
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
// CHECK-LABEL: define {{.*}}@unroll_partial_heuristic(
|
||||
// CHECK-NEXT: [[ENTRY:.*]]:
|
||||
// CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[C_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[D_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8
|
||||
// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4
|
||||
// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: store float* %[[A:.+]], float** %[[A_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 0, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 0
|
||||
// CHECK-NEXT: store i32* %[[I]], i32** %[[TMP0]], align 8
|
||||
// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[AGG_CAPTURED1]], i32 0, i32 0
|
||||
// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: store i32 %[[TMP2]], i32* %[[TMP1]], align 4
|
||||
// CHECK-NEXT: call void @__captured_stmt(i32* %[[DOTCOUNT_ADDR]], %struct.anon* %[[AGG_CAPTURED]])
|
||||
// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, i32* %[[DOTCOUNT_ADDR]], align 4
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]:
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_HEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_HEADER]]:
|
||||
// CHECK-NEXT: %[[OMP_LOOP_IV:.+]] = phi i32 [ 0, %[[OMP_LOOP_PREHEADER]] ], [ %[[OMP_LOOP_NEXT:.+]], %[[OMP_LOOP_INC:.+]] ]
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_COND:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_COND]]:
|
||||
// CHECK-NEXT: %[[OMP_LOOP_CMP:.+]] = icmp ult i32 %[[OMP_LOOP_IV]], %[[DOTCOUNT]]
|
||||
// CHECK-NEXT: br i1 %[[OMP_LOOP_CMP]], label %[[OMP_LOOP_BODY:.+]], label %[[OMP_LOOP_EXIT:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_BODY]]:
|
||||
// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[OMP_LOOP_IV]], %struct.anon.0* %[[AGG_CAPTURED1]])
|
||||
// CHECK-NEXT: %[[TMP3:.+]] = load float*, float** %[[B_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP4]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP3]], i64 %[[IDXPROM]]
|
||||
// CHECK-NEXT: %[[TMP5:.+]] = load float, float* %[[ARRAYIDX]], align 4
|
||||
// CHECK-NEXT: %[[TMP6:.+]] = load float*, float** %[[C_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP7]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP6]], i64 %[[IDXPROM2]]
|
||||
// CHECK-NEXT: %[[TMP8:.+]] = load float, float* %[[ARRAYIDX3]], align 4
|
||||
// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP5]], %[[TMP8]]
|
||||
// CHECK-NEXT: %[[TMP9:.+]] = load float*, float** %[[D_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM4:.+]] = sext i32 %[[TMP10]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, float* %[[TMP9]], i64 %[[IDXPROM4]]
|
||||
// CHECK-NEXT: %[[TMP11:.+]] = load float, float* %[[ARRAYIDX5]], align 4
|
||||
// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP11]]
|
||||
// CHECK-NEXT: %[[TMP12:.+]] = load float*, float** %[[A_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP13:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM7:.+]] = sext i32 %[[TMP13]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, float* %[[TMP12]], i64 %[[IDXPROM7]]
|
||||
// CHECK-NEXT: store float %[[MUL6]], float* %[[ARRAYIDX8]], align 4
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_INC]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_INC]]:
|
||||
// CHECK-NEXT: %[[OMP_LOOP_NEXT]] = add nuw i32 %[[OMP_LOOP_IV]], 1
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_EXIT]]:
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_AFTER]]:
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
void unroll_partial_heuristic(float *a, float *b, float *c, float *d) {
|
||||
#pragma omp unroll partial
|
||||
for (int i = 0; i < 2; i++) {
|
||||
a[i] = b[i] * c[i] * d[i];
|
||||
}
|
||||
}
|
||||
|
||||
#endif // HEADER
|
||||
|
||||
// CHECK-LABEL: define {{.*}}@__captured_stmt(
|
||||
// CHECK-NEXT: [[ENTRY:.*]]:
|
||||
// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon*, align 8
|
||||
// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: store i32* %[[DISTANCE:.+]], i32** %[[DISTANCE_ADDR]], align 8
|
||||
// CHECK-NEXT: store %struct.anon* %[[__CONTEXT:.+]], %struct.anon** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon*, %struct.anon** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[TMP0]], i32 0, i32 0
|
||||
// CHECK-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8
|
||||
// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4
|
||||
// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4
|
||||
// CHECK-NEXT: store i32 2, i32* %[[DOTSTOP]], align 4
|
||||
// CHECK-NEXT: store i32 1, i32* %[[DOTSTEP]], align 4
|
||||
// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[DOTSTART]], align 4
|
||||
// CHECK-NEXT: %[[TMP5:.+]] = load i32, i32* %[[DOTSTOP]], align 4
|
||||
// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP4]], %[[TMP5]]
|
||||
// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_TRUE]]:
|
||||
// CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[DOTSTOP]], align 4
|
||||
// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4
|
||||
// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]]
|
||||
// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4
|
||||
// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]]
|
||||
// CHECK-NEXT: br label %[[COND_END:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_FALSE]]:
|
||||
// CHECK-NEXT: br label %[[COND_END]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_END]]:
|
||||
// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ]
|
||||
// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
|
||||
|
||||
// CHECK-LABEL: define {{.*}}@__captured_stmt.1(
|
||||
// CHECK-NEXT: [[ENTRY:.*]]:
|
||||
// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon.0*, align 8
|
||||
// CHECK-NEXT: store i32* %[[LOOPVAR:.+]], i32** %[[LOOPVAR_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 %[[LOGICAL:.+]], i32* %[[LOGICAL_ADDR]], align 4
|
||||
// CHECK-NEXT: store %struct.anon.0* %[[__CONTEXT:.+]], %struct.anon.0** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon.0*, %struct.anon.0** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[TMP0]], i32 0, i32 0
|
||||
// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4
|
||||
// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4
|
||||
// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]]
|
||||
// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]]
|
||||
// CHECK-NEXT: %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 %[[ADD]], i32* %[[TMP4]], align 4
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
|
||||
|
||||
// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4}
|
||||
// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51}
|
||||
// CHECK: ![[META2:[0-9]+]] =
|
||||
// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]]}
|
||||
// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"}
|
|
@ -1,243 +0,0 @@
|
|||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs
|
||||
// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=51 -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
|
||||
// expected-no-diagnostics
|
||||
|
||||
// REQUIRES: x86-registered-target
|
||||
|
||||
// TODO: The unroll-factor heuristic might be able to use the information that the trip count is constant, but currently is not able to determine that.
|
||||
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
double sind(double);
|
||||
|
||||
// CHECK-LABEL: define {{.*}}@unroll_partial_heuristic_constant_for(
|
||||
// CHECK-NEXT: [[ENTRY:.*]]:
|
||||
// CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[C_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[D_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[E_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[OFFSET_ADDR:.+]] = alloca float, align 4
|
||||
// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8
|
||||
// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4
|
||||
// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[P_LASTITER:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[P_LOWERBOUND:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[P_UPPERBOUND:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[P_STRIDE:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: store float* %[[A:.+]], float** %[[A_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[E:.+]], float** %[[E_ADDR]], align 8
|
||||
// CHECK-NEXT: store float %[[OFFSET:.+]], float* %[[OFFSET_ADDR]], align 4
|
||||
// CHECK-NEXT: store i32 0, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 0
|
||||
// CHECK-NEXT: store i32* %[[I]], i32** %[[TMP0]], align 8
|
||||
// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[AGG_CAPTURED1]], i32 0, i32 0
|
||||
// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: store i32 %[[TMP2]], i32* %[[TMP1]], align 4
|
||||
// CHECK-NEXT: call void @__captured_stmt(i32* %[[DOTCOUNT_ADDR]], %struct.anon* %[[AGG_CAPTURED]])
|
||||
// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, i32* %[[DOTCOUNT_ADDR]], align 4
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]:
|
||||
// CHECK-NEXT: %[[TMP3:.+]] = udiv i32 %[[DOTCOUNT]], 4
|
||||
// CHECK-NEXT: %[[TMP4:.+]] = urem i32 %[[DOTCOUNT]], 4
|
||||
// CHECK-NEXT: %[[TMP5:.+]] = icmp ne i32 %[[TMP4]], 0
|
||||
// CHECK-NEXT: %[[TMP6:.+]] = zext i1 %[[TMP5]] to i32
|
||||
// CHECK-NEXT: %[[OMP_FLOOR0_TRIPCOUNT:.+]] = add nuw i32 %[[TMP3]], %[[TMP6]]
|
||||
// CHECK-NEXT: br label %[[OMP_FLOOR0_PREHEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_PREHEADER]]:
|
||||
// CHECK-NEXT: store i32 0, i32* %[[P_LOWERBOUND]], align 4
|
||||
// CHECK-NEXT: %[[TMP7:.+]] = sub i32 %[[OMP_FLOOR0_TRIPCOUNT]], 1
|
||||
// CHECK-NEXT: store i32 %[[TMP7]], i32* %[[P_UPPERBOUND]], align 4
|
||||
// CHECK-NEXT: store i32 1, i32* %[[P_STRIDE]], align 4
|
||||
// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
|
||||
// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]], i32 1, i32 1)
|
||||
// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[P_LOWERBOUND]], align 4
|
||||
// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[P_UPPERBOUND]], align 4
|
||||
// CHECK-NEXT: %[[TMP10:.+]] = sub i32 %[[TMP9]], %[[TMP8]]
|
||||
// CHECK-NEXT: %[[TMP11:.+]] = add i32 %[[TMP10]], 1
|
||||
// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_HEADER]]:
|
||||
// CHECK-NEXT: %[[OMP_FLOOR0_IV:.+]] = phi i32 [ 0, %[[OMP_FLOOR0_PREHEADER]] ], [ %[[OMP_FLOOR0_NEXT:.+]], %[[OMP_FLOOR0_INC:.+]] ]
|
||||
// CHECK-NEXT: br label %[[OMP_FLOOR0_COND:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_COND]]:
|
||||
// CHECK-NEXT: %[[OMP_FLOOR0_CMP:.+]] = icmp ult i32 %[[OMP_FLOOR0_IV]], %[[TMP11]]
|
||||
// CHECK-NEXT: br i1 %[[OMP_FLOOR0_CMP]], label %[[OMP_FLOOR0_BODY:.+]], label %[[OMP_FLOOR0_EXIT:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_BODY]]:
|
||||
// CHECK-NEXT: %[[TMP12:.+]] = add i32 %[[OMP_FLOOR0_IV]], %[[TMP8]]
|
||||
// CHECK-NEXT: %[[TMP13:.+]] = icmp eq i32 %[[TMP12]], %[[OMP_FLOOR0_TRIPCOUNT]]
|
||||
// CHECK-NEXT: %[[TMP14:.+]] = select i1 %[[TMP13]], i32 %[[TMP4]], i32 4
|
||||
// CHECK-NEXT: br label %[[OMP_TILE0_PREHEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_PREHEADER]]:
|
||||
// CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_HEADER]]:
|
||||
// CHECK-NEXT: %[[OMP_TILE0_IV:.+]] = phi i32 [ 0, %[[OMP_TILE0_PREHEADER]] ], [ %[[OMP_TILE0_NEXT:.+]], %[[OMP_TILE0_INC:.+]] ]
|
||||
// CHECK-NEXT: br label %[[OMP_TILE0_COND:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_COND]]:
|
||||
// CHECK-NEXT: %[[OMP_TILE0_CMP:.+]] = icmp ult i32 %[[OMP_TILE0_IV]], %[[TMP14]]
|
||||
// CHECK-NEXT: br i1 %[[OMP_TILE0_CMP]], label %[[OMP_TILE0_BODY:.+]], label %[[OMP_TILE0_EXIT:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_BODY]]:
|
||||
// CHECK-NEXT: %[[TMP15:.+]] = mul nuw i32 4, %[[TMP12]]
|
||||
// CHECK-NEXT: %[[TMP16:.+]] = add nuw i32 %[[TMP15]], %[[OMP_TILE0_IV]]
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_BODY:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_BODY]]:
|
||||
// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[TMP16]], %struct.anon.0* %[[AGG_CAPTURED1]])
|
||||
// CHECK-NEXT: %[[TMP17:.+]] = load float*, float** %[[B_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP18:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP18]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP17]], i64 %[[IDXPROM]]
|
||||
// CHECK-NEXT: %[[TMP19:.+]] = load float, float* %[[ARRAYIDX]], align 4
|
||||
// CHECK-NEXT: %[[CONV:.+]] = fpext float %[[TMP19]] to double
|
||||
// CHECK-NEXT: %[[CALL:.+]] = call double @sind(double %[[CONV]])
|
||||
// CHECK-NEXT: %[[TMP20:.+]] = load float*, float** %[[C_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP21:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP21]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP20]], i64 %[[IDXPROM2]]
|
||||
// CHECK-NEXT: %[[TMP22:.+]] = load float, float* %[[ARRAYIDX3]], align 4
|
||||
// CHECK-NEXT: %[[CONV4:.+]] = fpext float %[[TMP22]] to double
|
||||
// CHECK-NEXT: %[[MUL:.+]] = fmul double %[[CALL]], %[[CONV4]]
|
||||
// CHECK-NEXT: %[[TMP23:.+]] = load float*, float** %[[D_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP24:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM5:.+]] = sext i32 %[[TMP24]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX6:.+]] = getelementptr inbounds float, float* %[[TMP23]], i64 %[[IDXPROM5]]
|
||||
// CHECK-NEXT: %[[TMP25:.+]] = load float, float* %[[ARRAYIDX6]], align 4
|
||||
// CHECK-NEXT: %[[CONV7:.+]] = fpext float %[[TMP25]] to double
|
||||
// CHECK-NEXT: %[[MUL8:.+]] = fmul double %[[MUL]], %[[CONV7]]
|
||||
// CHECK-NEXT: %[[TMP26:.+]] = load float*, float** %[[E_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP27:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM9:.+]] = sext i32 %[[TMP27]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX10:.+]] = getelementptr inbounds float, float* %[[TMP26]], i64 %[[IDXPROM9]]
|
||||
// CHECK-NEXT: %[[TMP28:.+]] = load float, float* %[[ARRAYIDX10]], align 4
|
||||
// CHECK-NEXT: %[[CONV11:.+]] = fpext float %[[TMP28]] to double
|
||||
// CHECK-NEXT: %[[MUL12:.+]] = fmul double %[[MUL8]], %[[CONV11]]
|
||||
// CHECK-NEXT: %[[TMP29:.+]] = load float, float* %[[OFFSET_ADDR]], align 4
|
||||
// CHECK-NEXT: %[[CONV13:.+]] = fpext float %[[TMP29]] to double
|
||||
// CHECK-NEXT: %[[ADD:.+]] = fadd double %[[MUL12]], %[[CONV13]]
|
||||
// CHECK-NEXT: %[[TMP30:.+]] = load float*, float** %[[A_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP31:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM14:.+]] = sext i32 %[[TMP31]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX15:.+]] = getelementptr inbounds float, float* %[[TMP30]], i64 %[[IDXPROM14]]
|
||||
// CHECK-NEXT: %[[TMP32:.+]] = load float, float* %[[ARRAYIDX15]], align 4
|
||||
// CHECK-NEXT: %[[CONV16:.+]] = fpext float %[[TMP32]] to double
|
||||
// CHECK-NEXT: %[[ADD17:.+]] = fadd double %[[CONV16]], %[[ADD]]
|
||||
// CHECK-NEXT: %[[CONV18:.+]] = fptrunc double %[[ADD17]] to float
|
||||
// CHECK-NEXT: store float %[[CONV18]], float* %[[ARRAYIDX15]], align 4
|
||||
// CHECK-NEXT: br label %[[OMP_TILE0_INC]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_INC]]:
|
||||
// CHECK-NEXT: %[[OMP_TILE0_NEXT]] = add nuw i32 %[[OMP_TILE0_IV]], 1
|
||||
// CHECK-NEXT: br label %[[OMP_TILE0_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_EXIT]]:
|
||||
// CHECK-NEXT: br label %[[OMP_TILE0_AFTER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_AFTER]]:
|
||||
// CHECK-NEXT: br label %[[OMP_FLOOR0_INC]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_INC]]:
|
||||
// CHECK-NEXT: %[[OMP_FLOOR0_NEXT]] = add nuw i32 %[[OMP_FLOOR0_IV]], 1
|
||||
// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_EXIT]]:
|
||||
// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]])
|
||||
// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM19:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
|
||||
// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @2, i32 %[[OMP_GLOBAL_THREAD_NUM19]])
|
||||
// CHECK-NEXT: br label %[[OMP_FLOOR0_AFTER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_AFTER]]:
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_AFTER]]:
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
|
||||
void unroll_partial_heuristic_constant_for(float *a, float *b, float *c, float *d, float *e, float offset) {
|
||||
#pragma omp for
|
||||
#pragma omp unroll partial
|
||||
for (int i = 0; i < 128; i++) {
|
||||
a[i] += sind(b[i]) * c[i] * d[i] * e[i] + offset;
|
||||
}
|
||||
}
|
||||
|
||||
#endif // HEADER
|
||||
|
||||
// CHECK-LABEL: define {{.*}}@__captured_stmt(
|
||||
// CHECK-NEXT: [[ENTRY:.*]]:
|
||||
// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon*, align 8
|
||||
// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: store i32* %[[DISTANCE:.+]], i32** %[[DISTANCE_ADDR]], align 8
|
||||
// CHECK-NEXT: store %struct.anon* %[[__CONTEXT:.+]], %struct.anon** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon*, %struct.anon** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[TMP0]], i32 0, i32 0
|
||||
// CHECK-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8
|
||||
// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4
|
||||
// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4
|
||||
// CHECK-NEXT: store i32 128, i32* %[[DOTSTOP]], align 4
|
||||
// CHECK-NEXT: store i32 1, i32* %[[DOTSTEP]], align 4
|
||||
// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[DOTSTART]], align 4
|
||||
// CHECK-NEXT: %[[TMP5:.+]] = load i32, i32* %[[DOTSTOP]], align 4
|
||||
// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP4]], %[[TMP5]]
|
||||
// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_TRUE]]:
|
||||
// CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[DOTSTOP]], align 4
|
||||
// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4
|
||||
// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]]
|
||||
// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4
|
||||
// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]]
|
||||
// CHECK-NEXT: br label %[[COND_END:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_FALSE]]:
|
||||
// CHECK-NEXT: br label %[[COND_END]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_END]]:
|
||||
// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ]
|
||||
// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
|
||||
|
||||
// CHECK-LABEL: define {{.*}}@__captured_stmt.1(
|
||||
// CHECK-NEXT: [[ENTRY:.*]]:
|
||||
// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon.0*, align 8
|
||||
// CHECK-NEXT: store i32* %[[LOOPVAR:.+]], i32** %[[LOOPVAR_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 %[[LOGICAL:.+]], i32* %[[LOGICAL_ADDR]], align 4
|
||||
// CHECK-NEXT: store %struct.anon.0* %[[__CONTEXT:.+]], %struct.anon.0** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon.0*, %struct.anon.0** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[TMP0]], i32 0, i32 0
|
||||
// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4
|
||||
// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4
|
||||
// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]]
|
||||
// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]]
|
||||
// CHECK-NEXT: %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 %[[ADD]], i32* %[[TMP4]], align 4
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
|
||||
|
||||
// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4}
|
||||
// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51}
|
||||
// CHECK: ![[META2:[0-9]+]] =
|
||||
// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]}
|
||||
// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"}
|
||||
// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 4}
|
|
@ -1,225 +0,0 @@
|
|||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs
|
||||
// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=51 -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
|
||||
// expected-no-diagnostics
|
||||
|
||||
// REQUIRES: x86-registered-target
|
||||
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
double sind(double);
|
||||
|
||||
// CHECK-LABEL: define {{.*}}@unroll_partial_heuristic_for(
|
||||
// CHECK-NEXT: [[ENTRY:.*]]:
|
||||
// CHECK-NEXT: %[[M_ADDR:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[C_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[D_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[E_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[OFFSET_ADDR:.+]] = alloca float, align 4
|
||||
// CHECK-NEXT: %[[DOTOMP_IV:.+]] = alloca i64, align 8
|
||||
// CHECK-NEXT: %[[TMP:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[TMP1:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[DOTCAPTURE_EXPR_:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[J:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[DOTCAPTURE_EXPR_2:.+]] = alloca i64, align 8
|
||||
// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[DOTUNROLLED_IV_J:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[DOTOMP_LB:.+]] = alloca i64, align 8
|
||||
// CHECK-NEXT: %[[DOTOMP_UB:.+]] = alloca i64, align 8
|
||||
// CHECK-NEXT: %[[DOTOMP_STRIDE:.+]] = alloca i64, align 8
|
||||
// CHECK-NEXT: %[[DOTOMP_IS_LAST:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[I6:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[DOTUNROLLED_IV_J7:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[DOTUNROLL_INNER_IV_J:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: store i32 %[[M:.+]], i32* %[[M_ADDR]], align 4
|
||||
// CHECK-NEXT: store float* %[[A:.+]], float** %[[A_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[E:.+]], float** %[[E_ADDR]], align 8
|
||||
// CHECK-NEXT: store float %[[OFFSET:.+]], float* %[[OFFSET_ADDR]], align 4
|
||||
// CHECK-NEXT: %[[TMP0:.+]] = load i32, i32* %[[M_ADDR]], align 4
|
||||
// CHECK-NEXT: store i32 %[[TMP0]], i32* %[[DOTCAPTURE_EXPR_]], align 4
|
||||
// CHECK-NEXT: store i32 0, i32* %[[J]], align 4
|
||||
// CHECK-NEXT: %[[TMP1_1:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
|
||||
// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP1_1]], 0
|
||||
// CHECK-NEXT: %[[DIV:.+]] = sdiv i32 %[[SUB]], 1
|
||||
// CHECK-NEXT: %[[CONV:.+]] = sext i32 %[[DIV]] to i64
|
||||
// CHECK-NEXT: %[[MUL:.+]] = mul nsw i64 %[[CONV]], 4
|
||||
// CHECK-NEXT: %[[SUB3:.+]] = sub nsw i64 %[[MUL]], 1
|
||||
// CHECK-NEXT: store i64 %[[SUB3]], i64* %[[DOTCAPTURE_EXPR_2]], align 8
|
||||
// CHECK-NEXT: store i32 0, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: store i32 0, i32* %[[DOTUNROLLED_IV_J]], align 4
|
||||
// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
|
||||
// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 0, %[[TMP2]]
|
||||
// CHECK-NEXT: br i1 %[[CMP]], label %[[OMP_PRECOND_THEN:.+]], label %[[OMP_PRECOND_END:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_PRECOND_THEN]]:
|
||||
// CHECK-NEXT: store i64 0, i64* %[[DOTOMP_LB]], align 8
|
||||
// CHECK-NEXT: %[[TMP3:.+]] = load i64, i64* %[[DOTCAPTURE_EXPR_2]], align 8
|
||||
// CHECK-NEXT: store i64 %[[TMP3]], i64* %[[DOTOMP_UB]], align 8
|
||||
// CHECK-NEXT: store i64 1, i64* %[[DOTOMP_STRIDE]], align 8
|
||||
// CHECK-NEXT: store i32 0, i32* %[[DOTOMP_IS_LAST]], align 4
|
||||
// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @3)
|
||||
// CHECK-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[DOTOMP_IS_LAST]], i64* %[[DOTOMP_LB]], i64* %[[DOTOMP_UB]], i64* %[[DOTOMP_STRIDE]], i64 1, i64 1)
|
||||
// CHECK-NEXT: %[[TMP4:.+]] = load i64, i64* %[[DOTOMP_UB]], align 8
|
||||
// CHECK-NEXT: %[[TMP5:.+]] = load i64, i64* %[[DOTCAPTURE_EXPR_2]], align 8
|
||||
// CHECK-NEXT: %[[CMP8:.+]] = icmp sgt i64 %[[TMP4]], %[[TMP5]]
|
||||
// CHECK-NEXT: br i1 %[[CMP8]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_TRUE]]:
|
||||
// CHECK-NEXT: %[[TMP6:.+]] = load i64, i64* %[[DOTCAPTURE_EXPR_2]], align 8
|
||||
// CHECK-NEXT: br label %[[COND_END:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_FALSE]]:
|
||||
// CHECK-NEXT: %[[TMP7:.+]] = load i64, i64* %[[DOTOMP_UB]], align 8
|
||||
// CHECK-NEXT: br label %[[COND_END]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_END]]:
|
||||
// CHECK-NEXT: %[[COND:.+]] = phi i64 [ %[[TMP6]], %[[COND_TRUE]] ], [ %[[TMP7]], %[[COND_FALSE]] ]
|
||||
// CHECK-NEXT: store i64 %[[COND]], i64* %[[DOTOMP_UB]], align 8
|
||||
// CHECK-NEXT: %[[TMP8:.+]] = load i64, i64* %[[DOTOMP_LB]], align 8
|
||||
// CHECK-NEXT: store i64 %[[TMP8]], i64* %[[DOTOMP_IV]], align 8
|
||||
// CHECK-NEXT: br label %[[OMP_INNER_FOR_COND:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_INNER_FOR_COND]]:
|
||||
// CHECK-NEXT: %[[TMP9:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8
|
||||
// CHECK-NEXT: %[[TMP10:.+]] = load i64, i64* %[[DOTOMP_UB]], align 8
|
||||
// CHECK-NEXT: %[[CMP10:.+]] = icmp sle i64 %[[TMP9]], %[[TMP10]]
|
||||
// CHECK-NEXT: br i1 %[[CMP10]], label %[[OMP_INNER_FOR_BODY:.+]], label %[[OMP_INNER_FOR_END:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_INNER_FOR_BODY]]:
|
||||
// CHECK-NEXT: %[[TMP11:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8
|
||||
// CHECK-NEXT: %[[DIV12:.+]] = sdiv i64 %[[TMP11]], 4
|
||||
// CHECK-NEXT: %[[MUL13:.+]] = mul nsw i64 %[[DIV12]], 1
|
||||
// CHECK-NEXT: %[[ADD:.+]] = add nsw i64 0, %[[MUL13]]
|
||||
// CHECK-NEXT: %[[CONV14:.+]] = trunc i64 %[[ADD]] to i32
|
||||
// CHECK-NEXT: store i32 %[[CONV14]], i32* %[[I6]], align 4
|
||||
// CHECK-NEXT: %[[TMP12:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8
|
||||
// CHECK-NEXT: %[[TMP13:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8
|
||||
// CHECK-NEXT: %[[DIV15:.+]] = sdiv i64 %[[TMP13]], 4
|
||||
// CHECK-NEXT: %[[MUL16:.+]] = mul nsw i64 %[[DIV15]], 4
|
||||
// CHECK-NEXT: %[[SUB17:.+]] = sub nsw i64 %[[TMP12]], %[[MUL16]]
|
||||
// CHECK-NEXT: %[[MUL18:.+]] = mul nsw i64 %[[SUB17]], 2
|
||||
// CHECK-NEXT: %[[ADD19:.+]] = add nsw i64 0, %[[MUL18]]
|
||||
// CHECK-NEXT: %[[CONV20:.+]] = trunc i64 %[[ADD19]] to i32
|
||||
// CHECK-NEXT: store i32 %[[CONV20]], i32* %[[DOTUNROLLED_IV_J7]], align 4
|
||||
// CHECK-NEXT: %[[TMP14:.+]] = load i32, i32* %[[DOTUNROLLED_IV_J7]], align 4
|
||||
// CHECK-NEXT: store i32 %[[TMP14]], i32* %[[DOTUNROLL_INNER_IV_J]], align 4
|
||||
// CHECK-NEXT: br label %[[FOR_COND:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[FOR_COND]]:
|
||||
// CHECK-NEXT: %[[TMP15:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4
|
||||
// CHECK-NEXT: %[[TMP16:.+]] = load i32, i32* %[[DOTUNROLLED_IV_J7]], align 4
|
||||
// CHECK-NEXT: %[[ADD21:.+]] = add nsw i32 %[[TMP16]], 2
|
||||
// CHECK-NEXT: %[[CMP22:.+]] = icmp sle i32 %[[TMP15]], %[[ADD21]]
|
||||
// CHECK-NEXT: br i1 %[[CMP22]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[LAND_RHS]]:
|
||||
// CHECK-NEXT: %[[TMP17:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4
|
||||
// CHECK-NEXT: %[[CMP24:.+]] = icmp sle i32 %[[TMP17]], 8
|
||||
// CHECK-NEXT: br label %[[LAND_END]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[LAND_END]]:
|
||||
// CHECK-NEXT: %[[TMP18:.+]] = phi i1 [ false, %[[FOR_COND]] ], [ %[[CMP24]], %[[LAND_RHS]] ]
|
||||
// CHECK-NEXT: br i1 %[[TMP18]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[FOR_BODY]]:
|
||||
// CHECK-NEXT: %[[TMP19:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4
|
||||
// CHECK-NEXT: %[[MUL26:.+]] = mul nsw i32 %[[TMP19]], 1
|
||||
// CHECK-NEXT: %[[ADD27:.+]] = add nsw i32 0, %[[MUL26]]
|
||||
// CHECK-NEXT: store i32 %[[ADD27]], i32* %[[J]], align 4
|
||||
// CHECK-NEXT: %[[TMP20:.+]] = load float*, float** %[[B_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP21:.+]] = load i32, i32* %[[I6]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP21]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP20]], i64 %[[IDXPROM]]
|
||||
// CHECK-NEXT: %[[TMP22:.+]] = load float, float* %[[ARRAYIDX]], align 4
|
||||
// CHECK-NEXT: %[[CONV28:.+]] = fpext float %[[TMP22]] to double
|
||||
// CHECK-NEXT: %[[CALL:.+]] = call double @sind(double %[[CONV28]])
|
||||
// CHECK-NEXT: %[[TMP23:.+]] = load float*, float** %[[C_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP24:.+]] = load i32, i32* %[[I6]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM29:.+]] = sext i32 %[[TMP24]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX30:.+]] = getelementptr inbounds float, float* %[[TMP23]], i64 %[[IDXPROM29]]
|
||||
// CHECK-NEXT: %[[TMP25:.+]] = load float, float* %[[ARRAYIDX30]], align 4
|
||||
// CHECK-NEXT: %[[CONV31:.+]] = fpext float %[[TMP25]] to double
|
||||
// CHECK-NEXT: %[[MUL32:.+]] = fmul double %[[CALL]], %[[CONV31]]
|
||||
// CHECK-NEXT: %[[TMP26:.+]] = load float*, float** %[[D_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP27:.+]] = load i32, i32* %[[I6]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM33:.+]] = sext i32 %[[TMP27]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX34:.+]] = getelementptr inbounds float, float* %[[TMP26]], i64 %[[IDXPROM33]]
|
||||
// CHECK-NEXT: %[[TMP28:.+]] = load float, float* %[[ARRAYIDX34]], align 4
|
||||
// CHECK-NEXT: %[[CONV35:.+]] = fpext float %[[TMP28]] to double
|
||||
// CHECK-NEXT: %[[MUL36:.+]] = fmul double %[[MUL32]], %[[CONV35]]
|
||||
// CHECK-NEXT: %[[TMP29:.+]] = load float*, float** %[[E_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP30:.+]] = load i32, i32* %[[I6]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM37:.+]] = sext i32 %[[TMP30]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX38:.+]] = getelementptr inbounds float, float* %[[TMP29]], i64 %[[IDXPROM37]]
|
||||
// CHECK-NEXT: %[[TMP31:.+]] = load float, float* %[[ARRAYIDX38]], align 4
|
||||
// CHECK-NEXT: %[[CONV39:.+]] = fpext float %[[TMP31]] to double
|
||||
// CHECK-NEXT: %[[MUL40:.+]] = fmul double %[[MUL36]], %[[CONV39]]
|
||||
// CHECK-NEXT: %[[TMP32:.+]] = load float, float* %[[OFFSET_ADDR]], align 4
|
||||
// CHECK-NEXT: %[[CONV41:.+]] = fpext float %[[TMP32]] to double
|
||||
// CHECK-NEXT: %[[ADD42:.+]] = fadd double %[[MUL40]], %[[CONV41]]
|
||||
// CHECK-NEXT: %[[TMP33:.+]] = load float*, float** %[[A_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP34:.+]] = load i32, i32* %[[I6]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM43:.+]] = sext i32 %[[TMP34]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX44:.+]] = getelementptr inbounds float, float* %[[TMP33]], i64 %[[IDXPROM43]]
|
||||
// CHECK-NEXT: %[[TMP35:.+]] = load float, float* %[[ARRAYIDX44]], align 4
|
||||
// CHECK-NEXT: %[[CONV45:.+]] = fpext float %[[TMP35]] to double
|
||||
// CHECK-NEXT: %[[ADD46:.+]] = fadd double %[[CONV45]], %[[ADD42]]
|
||||
// CHECK-NEXT: %[[CONV47:.+]] = fptrunc double %[[ADD46]] to float
|
||||
// CHECK-NEXT: store float %[[CONV47]], float* %[[ARRAYIDX44]], align 4
|
||||
// CHECK-NEXT: br label %[[FOR_INC:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[FOR_INC]]:
|
||||
// CHECK-NEXT: %[[TMP36:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4
|
||||
// CHECK-NEXT: %[[INC:.+]] = add nsw i32 %[[TMP36]], 1
|
||||
// CHECK-NEXT: store i32 %[[INC]], i32* %[[DOTUNROLL_INNER_IV_J]], align 4
|
||||
// CHECK-NEXT: br label %[[FOR_COND]], !llvm.loop ![[LOOP3:[0-9]+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[FOR_END]]:
|
||||
// CHECK-NEXT: br label %[[OMP_BODY_CONTINUE:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_BODY_CONTINUE]]:
|
||||
// CHECK-NEXT: br label %[[OMP_INNER_FOR_INC:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_INNER_FOR_INC]]:
|
||||
// CHECK-NEXT: %[[TMP37:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8
|
||||
// CHECK-NEXT: %[[ADD48:.+]] = add nsw i64 %[[TMP37]], 1
|
||||
// CHECK-NEXT: store i64 %[[ADD48]], i64* %[[DOTOMP_IV]], align 8
|
||||
// CHECK-NEXT: br label %[[OMP_INNER_FOR_COND]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_INNER_FOR_END]]:
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_EXIT:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_EXIT]]:
|
||||
// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM49:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @5)
|
||||
// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM49]])
|
||||
// CHECK-NEXT: br label %[[OMP_PRECOND_END]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_PRECOND_END]]:
|
||||
// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM50:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @7)
|
||||
// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @6, i32 %[[OMP_GLOBAL_THREAD_NUM50]])
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
|
||||
void unroll_partial_heuristic_for(int m, float *a, float *b, float *c, float *d, float *e, float offset) {
|
||||
#pragma omp for collapse(2)
|
||||
for (int i = 0; i < m; i++) {
|
||||
#pragma omp unroll partial
|
||||
for (int j = 0; j < 8; j++) {
|
||||
a[i] += sind(b[i]) * c[i] * d[i] * e[i] + offset;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif // HEADER
|
||||
|
||||
// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4}
|
||||
// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51}
|
||||
// CHECK: ![[META2:[0-9]+]] =
|
||||
// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]}
|
||||
// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.mustprogress"}
|
||||
// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 2}
|
|
@ -1,248 +0,0 @@
|
|||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs
|
||||
// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=51 -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
|
||||
// expected-no-diagnostics
|
||||
|
||||
// REQUIRES: x86-registered-target
|
||||
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
double sind(double);
|
||||
|
||||
// CHECK-LABEL: define {{.*}}@unroll_partial_heuristic_runtime_for(
|
||||
// CHECK-NEXT: [[ENTRY:.*]]:
|
||||
// CHECK-NEXT: %[[N_ADDR:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[C_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[D_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[E_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[OFFSET_ADDR:.+]] = alloca float, align 4
|
||||
// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8
|
||||
// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4
|
||||
// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[P_LASTITER:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[P_LOWERBOUND:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[P_UPPERBOUND:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[P_STRIDE:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: store i32 %[[N:.+]], i32* %[[N_ADDR]], align 4
|
||||
// CHECK-NEXT: store float* %[[A:.+]], float** %[[A_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[E:.+]], float** %[[E_ADDR]], align 8
|
||||
// CHECK-NEXT: store float %[[OFFSET:.+]], float* %[[OFFSET_ADDR]], align 4
|
||||
// CHECK-NEXT: store i32 0, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 0
|
||||
// CHECK-NEXT: store i32* %[[I]], i32** %[[TMP0]], align 8
|
||||
// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 1
|
||||
// CHECK-NEXT: store i32* %[[N_ADDR]], i32** %[[TMP1]], align 8
|
||||
// CHECK-NEXT: %[[TMP2:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[AGG_CAPTURED1]], i32 0, i32 0
|
||||
// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[TMP2]], align 4
|
||||
// CHECK-NEXT: call void @__captured_stmt(i32* %[[DOTCOUNT_ADDR]], %struct.anon* %[[AGG_CAPTURED]])
|
||||
// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, i32* %[[DOTCOUNT_ADDR]], align 4
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]:
|
||||
// CHECK-NEXT: %[[TMP4:.+]] = udiv i32 %[[DOTCOUNT]], 4
|
||||
// CHECK-NEXT: %[[TMP5:.+]] = urem i32 %[[DOTCOUNT]], 4
|
||||
// CHECK-NEXT: %[[TMP6:.+]] = icmp ne i32 %[[TMP5]], 0
|
||||
// CHECK-NEXT: %[[TMP7:.+]] = zext i1 %[[TMP6]] to i32
|
||||
// CHECK-NEXT: %[[OMP_FLOOR0_TRIPCOUNT:.+]] = add nuw i32 %[[TMP4]], %[[TMP7]]
|
||||
// CHECK-NEXT: br label %[[OMP_FLOOR0_PREHEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_PREHEADER]]:
|
||||
// CHECK-NEXT: store i32 0, i32* %[[P_LOWERBOUND]], align 4
|
||||
// CHECK-NEXT: %[[TMP8:.+]] = sub i32 %[[OMP_FLOOR0_TRIPCOUNT]], 1
|
||||
// CHECK-NEXT: store i32 %[[TMP8]], i32* %[[P_UPPERBOUND]], align 4
|
||||
// CHECK-NEXT: store i32 1, i32* %[[P_STRIDE]], align 4
|
||||
// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
|
||||
// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]], i32 1, i32 1)
|
||||
// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[P_LOWERBOUND]], align 4
|
||||
// CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[P_UPPERBOUND]], align 4
|
||||
// CHECK-NEXT: %[[TMP11:.+]] = sub i32 %[[TMP10]], %[[TMP9]]
|
||||
// CHECK-NEXT: %[[TMP12:.+]] = add i32 %[[TMP11]], 1
|
||||
// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_HEADER]]:
|
||||
// CHECK-NEXT: %[[OMP_FLOOR0_IV:.+]] = phi i32 [ 0, %[[OMP_FLOOR0_PREHEADER]] ], [ %[[OMP_FLOOR0_NEXT:.+]], %[[OMP_FLOOR0_INC:.+]] ]
|
||||
// CHECK-NEXT: br label %[[OMP_FLOOR0_COND:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_COND]]:
|
||||
// CHECK-NEXT: %[[OMP_FLOOR0_CMP:.+]] = icmp ult i32 %[[OMP_FLOOR0_IV]], %[[TMP12]]
|
||||
// CHECK-NEXT: br i1 %[[OMP_FLOOR0_CMP]], label %[[OMP_FLOOR0_BODY:.+]], label %[[OMP_FLOOR0_EXIT:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_BODY]]:
|
||||
// CHECK-NEXT: %[[TMP13:.+]] = add i32 %[[OMP_FLOOR0_IV]], %[[TMP9]]
|
||||
// CHECK-NEXT: %[[TMP14:.+]] = icmp eq i32 %[[TMP13]], %[[OMP_FLOOR0_TRIPCOUNT]]
|
||||
// CHECK-NEXT: %[[TMP15:.+]] = select i1 %[[TMP14]], i32 %[[TMP5]], i32 4
|
||||
// CHECK-NEXT: br label %[[OMP_TILE0_PREHEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_PREHEADER]]:
|
||||
// CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_HEADER]]:
|
||||
// CHECK-NEXT: %[[OMP_TILE0_IV:.+]] = phi i32 [ 0, %[[OMP_TILE0_PREHEADER]] ], [ %[[OMP_TILE0_NEXT:.+]], %[[OMP_TILE0_INC:.+]] ]
|
||||
// CHECK-NEXT: br label %[[OMP_TILE0_COND:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_COND]]:
|
||||
// CHECK-NEXT: %[[OMP_TILE0_CMP:.+]] = icmp ult i32 %[[OMP_TILE0_IV]], %[[TMP15]]
|
||||
// CHECK-NEXT: br i1 %[[OMP_TILE0_CMP]], label %[[OMP_TILE0_BODY:.+]], label %[[OMP_TILE0_EXIT:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_BODY]]:
|
||||
// CHECK-NEXT: %[[TMP16:.+]] = mul nuw i32 4, %[[TMP13]]
|
||||
// CHECK-NEXT: %[[TMP17:.+]] = add nuw i32 %[[TMP16]], %[[OMP_TILE0_IV]]
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_BODY:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_BODY]]:
|
||||
// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[TMP17]], %struct.anon.0* %[[AGG_CAPTURED1]])
|
||||
// CHECK-NEXT: %[[TMP18:.+]] = load float*, float** %[[B_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP19:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP19]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP18]], i64 %[[IDXPROM]]
|
||||
// CHECK-NEXT: %[[TMP20:.+]] = load float, float* %[[ARRAYIDX]], align 4
|
||||
// CHECK-NEXT: %[[CONV:.+]] = fpext float %[[TMP20]] to double
|
||||
// CHECK-NEXT: %[[CALL:.+]] = call double @sind(double %[[CONV]])
|
||||
// CHECK-NEXT: %[[TMP21:.+]] = load float*, float** %[[C_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP22:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP22]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP21]], i64 %[[IDXPROM2]]
|
||||
// CHECK-NEXT: %[[TMP23:.+]] = load float, float* %[[ARRAYIDX3]], align 4
|
||||
// CHECK-NEXT: %[[CONV4:.+]] = fpext float %[[TMP23]] to double
|
||||
// CHECK-NEXT: %[[MUL:.+]] = fmul double %[[CALL]], %[[CONV4]]
|
||||
// CHECK-NEXT: %[[TMP24:.+]] = load float*, float** %[[D_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP25:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM5:.+]] = sext i32 %[[TMP25]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX6:.+]] = getelementptr inbounds float, float* %[[TMP24]], i64 %[[IDXPROM5]]
|
||||
// CHECK-NEXT: %[[TMP26:.+]] = load float, float* %[[ARRAYIDX6]], align 4
|
||||
// CHECK-NEXT: %[[CONV7:.+]] = fpext float %[[TMP26]] to double
|
||||
// CHECK-NEXT: %[[MUL8:.+]] = fmul double %[[MUL]], %[[CONV7]]
|
||||
// CHECK-NEXT: %[[TMP27:.+]] = load float*, float** %[[E_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP28:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM9:.+]] = sext i32 %[[TMP28]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX10:.+]] = getelementptr inbounds float, float* %[[TMP27]], i64 %[[IDXPROM9]]
|
||||
// CHECK-NEXT: %[[TMP29:.+]] = load float, float* %[[ARRAYIDX10]], align 4
|
||||
// CHECK-NEXT: %[[CONV11:.+]] = fpext float %[[TMP29]] to double
|
||||
// CHECK-NEXT: %[[MUL12:.+]] = fmul double %[[MUL8]], %[[CONV11]]
|
||||
// CHECK-NEXT: %[[TMP30:.+]] = load float, float* %[[OFFSET_ADDR]], align 4
|
||||
// CHECK-NEXT: %[[CONV13:.+]] = fpext float %[[TMP30]] to double
|
||||
// CHECK-NEXT: %[[ADD:.+]] = fadd double %[[MUL12]], %[[CONV13]]
|
||||
// CHECK-NEXT: %[[TMP31:.+]] = load float*, float** %[[A_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP32:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM14:.+]] = sext i32 %[[TMP32]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX15:.+]] = getelementptr inbounds float, float* %[[TMP31]], i64 %[[IDXPROM14]]
|
||||
// CHECK-NEXT: %[[TMP33:.+]] = load float, float* %[[ARRAYIDX15]], align 4
|
||||
// CHECK-NEXT: %[[CONV16:.+]] = fpext float %[[TMP33]] to double
|
||||
// CHECK-NEXT: %[[ADD17:.+]] = fadd double %[[CONV16]], %[[ADD]]
|
||||
// CHECK-NEXT: %[[CONV18:.+]] = fptrunc double %[[ADD17]] to float
|
||||
// CHECK-NEXT: store float %[[CONV18]], float* %[[ARRAYIDX15]], align 4
|
||||
// CHECK-NEXT: br label %[[OMP_TILE0_INC]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_INC]]:
|
||||
// CHECK-NEXT: %[[OMP_TILE0_NEXT]] = add nuw i32 %[[OMP_TILE0_IV]], 1
|
||||
// CHECK-NEXT: br label %[[OMP_TILE0_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_EXIT]]:
|
||||
// CHECK-NEXT: br label %[[OMP_TILE0_AFTER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_AFTER]]:
|
||||
// CHECK-NEXT: br label %[[OMP_FLOOR0_INC]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_INC]]:
|
||||
// CHECK-NEXT: %[[OMP_FLOOR0_NEXT]] = add nuw i32 %[[OMP_FLOOR0_IV]], 1
|
||||
// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_EXIT]]:
|
||||
// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]])
|
||||
// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM19:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
|
||||
// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @2, i32 %[[OMP_GLOBAL_THREAD_NUM19]])
|
||||
// CHECK-NEXT: br label %[[OMP_FLOOR0_AFTER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_AFTER]]:
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_AFTER]]:
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
|
||||
void unroll_partial_heuristic_runtime_for(int n, float *a, float *b, float *c, float *d, float *e, float offset) {
|
||||
#pragma omp for
|
||||
#pragma omp unroll partial
|
||||
for (int i = 0; i < n; i++) {
|
||||
a[i] += sind(b[i]) * c[i] * d[i] * e[i] + offset;
|
||||
}
|
||||
}
|
||||
|
||||
#endif // HEADER
|
||||
|
||||
// CHECK-LABEL: define {{.*}}@__captured_stmt(
|
||||
// CHECK-NEXT: [[ENTRY:.*]]:
|
||||
// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon*, align 8
|
||||
// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: store i32* %[[DISTANCE:.+]], i32** %[[DISTANCE_ADDR]], align 8
|
||||
// CHECK-NEXT: store %struct.anon* %[[__CONTEXT:.+]], %struct.anon** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon*, %struct.anon** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[TMP0]], i32 0, i32 0
|
||||
// CHECK-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8
|
||||
// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4
|
||||
// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4
|
||||
// CHECK-NEXT: %[[TMP4:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[TMP0]], i32 0, i32 1
|
||||
// CHECK-NEXT: %[[TMP5:.+]] = load i32*, i32** %[[TMP4]], align 8
|
||||
// CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[TMP5]], align 4
|
||||
// CHECK-NEXT: store i32 %[[TMP6]], i32* %[[DOTSTOP]], align 4
|
||||
// CHECK-NEXT: store i32 1, i32* %[[DOTSTEP]], align 4
|
||||
// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4
|
||||
// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTOP]], align 4
|
||||
// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP7]], %[[TMP8]]
|
||||
// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_TRUE]]:
|
||||
// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[DOTSTOP]], align 4
|
||||
// CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[DOTSTART]], align 4
|
||||
// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP9]], %[[TMP10]]
|
||||
// CHECK-NEXT: %[[TMP11:.+]] = load i32, i32* %[[DOTSTEP]], align 4
|
||||
// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP11]]
|
||||
// CHECK-NEXT: br label %[[COND_END:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_FALSE]]:
|
||||
// CHECK-NEXT: br label %[[COND_END]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_END]]:
|
||||
// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ]
|
||||
// CHECK-NEXT: %[[TMP12:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP12]], align 4
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
|
||||
|
||||
// CHECK-LABEL: define {{.*}}@__captured_stmt.1(
|
||||
// CHECK-NEXT: [[ENTRY:.*]]:
|
||||
// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon.0*, align 8
|
||||
// CHECK-NEXT: store i32* %[[LOOPVAR:.+]], i32** %[[LOOPVAR_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 %[[LOGICAL:.+]], i32* %[[LOGICAL_ADDR]], align 4
|
||||
// CHECK-NEXT: store %struct.anon.0* %[[__CONTEXT:.+]], %struct.anon.0** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon.0*, %struct.anon.0** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[TMP0]], i32 0, i32 0
|
||||
// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4
|
||||
// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4
|
||||
// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]]
|
||||
// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]]
|
||||
// CHECK-NEXT: %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 %[[ADD]], i32* %[[TMP4]], align 4
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
|
||||
|
||||
// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4}
|
||||
// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51}
|
||||
// CHECK: ![[META2:[0-9]+]] =
|
||||
// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]}
|
||||
// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"}
|
||||
// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 4}
|
|
@ -1,215 +0,0 @@
|
|||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs
|
||||
// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=51 -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
|
||||
// expected-no-diagnostics
|
||||
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
// CHECK-LABEL: define {{.*}}@unroll_partial_factor_for(
|
||||
// CHECK-NEXT: [[ENTRY:.*]]:
|
||||
// CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[C_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[D_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8
|
||||
// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4
|
||||
// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[P_LASTITER:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[P_LOWERBOUND:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[P_UPPERBOUND:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[P_STRIDE:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: store float* %[[A:.+]], float** %[[A_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 0, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 0
|
||||
// CHECK-NEXT: store i32* %[[I]], i32** %[[TMP0]], align 8
|
||||
// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[AGG_CAPTURED1]], i32 0, i32 0
|
||||
// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: store i32 %[[TMP2]], i32* %[[TMP1]], align 4
|
||||
// CHECK-NEXT: call void @__captured_stmt(i32* %[[DOTCOUNT_ADDR]], %struct.anon* %[[AGG_CAPTURED]])
|
||||
// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, i32* %[[DOTCOUNT_ADDR]], align 4
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]:
|
||||
// CHECK-NEXT: %[[TMP3:.+]] = udiv i32 %[[DOTCOUNT]], 2
|
||||
// CHECK-NEXT: %[[TMP4:.+]] = urem i32 %[[DOTCOUNT]], 2
|
||||
// CHECK-NEXT: %[[TMP5:.+]] = icmp ne i32 %[[TMP4]], 0
|
||||
// CHECK-NEXT: %[[TMP6:.+]] = zext i1 %[[TMP5]] to i32
|
||||
// CHECK-NEXT: %[[OMP_FLOOR0_TRIPCOUNT:.+]] = add nuw i32 %[[TMP3]], %[[TMP6]]
|
||||
// CHECK-NEXT: br label %[[OMP_FLOOR0_PREHEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_PREHEADER]]:
|
||||
// CHECK-NEXT: store i32 0, i32* %[[P_LOWERBOUND]], align 4
|
||||
// CHECK-NEXT: %[[TMP7:.+]] = sub i32 %[[OMP_FLOOR0_TRIPCOUNT]], 1
|
||||
// CHECK-NEXT: store i32 %[[TMP7]], i32* %[[P_UPPERBOUND]], align 4
|
||||
// CHECK-NEXT: store i32 1, i32* %[[P_STRIDE]], align 4
|
||||
// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
|
||||
// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]], i32 1, i32 1)
|
||||
// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[P_LOWERBOUND]], align 4
|
||||
// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[P_UPPERBOUND]], align 4
|
||||
// CHECK-NEXT: %[[TMP10:.+]] = sub i32 %[[TMP9]], %[[TMP8]]
|
||||
// CHECK-NEXT: %[[TMP11:.+]] = add i32 %[[TMP10]], 1
|
||||
// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_HEADER]]:
|
||||
// CHECK-NEXT: %[[OMP_FLOOR0_IV:.+]] = phi i32 [ 0, %[[OMP_FLOOR0_PREHEADER]] ], [ %[[OMP_FLOOR0_NEXT:.+]], %[[OMP_FLOOR0_INC:.+]] ]
|
||||
// CHECK-NEXT: br label %[[OMP_FLOOR0_COND:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_COND]]:
|
||||
// CHECK-NEXT: %[[OMP_FLOOR0_CMP:.+]] = icmp ult i32 %[[OMP_FLOOR0_IV]], %[[TMP11]]
|
||||
// CHECK-NEXT: br i1 %[[OMP_FLOOR0_CMP]], label %[[OMP_FLOOR0_BODY:.+]], label %[[OMP_FLOOR0_EXIT:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_BODY]]:
|
||||
// CHECK-NEXT: %[[TMP12:.+]] = add i32 %[[OMP_FLOOR0_IV]], %[[TMP8]]
|
||||
// CHECK-NEXT: %[[TMP13:.+]] = icmp eq i32 %[[TMP12]], %[[OMP_FLOOR0_TRIPCOUNT]]
|
||||
// CHECK-NEXT: %[[TMP14:.+]] = select i1 %[[TMP13]], i32 %[[TMP4]], i32 2
|
||||
// CHECK-NEXT: br label %[[OMP_TILE0_PREHEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_PREHEADER]]:
|
||||
// CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_HEADER]]:
|
||||
// CHECK-NEXT: %[[OMP_TILE0_IV:.+]] = phi i32 [ 0, %[[OMP_TILE0_PREHEADER]] ], [ %[[OMP_TILE0_NEXT:.+]], %[[OMP_TILE0_INC:.+]] ]
|
||||
// CHECK-NEXT: br label %[[OMP_TILE0_COND:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_COND]]:
|
||||
// CHECK-NEXT: %[[OMP_TILE0_CMP:.+]] = icmp ult i32 %[[OMP_TILE0_IV]], %[[TMP14]]
|
||||
// CHECK-NEXT: br i1 %[[OMP_TILE0_CMP]], label %[[OMP_TILE0_BODY:.+]], label %[[OMP_TILE0_EXIT:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_BODY]]:
|
||||
// CHECK-NEXT: %[[TMP15:.+]] = mul nuw i32 2, %[[TMP12]]
|
||||
// CHECK-NEXT: %[[TMP16:.+]] = add nuw i32 %[[TMP15]], %[[OMP_TILE0_IV]]
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_BODY:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_BODY]]:
|
||||
// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[TMP16]], %struct.anon.0* %[[AGG_CAPTURED1]])
|
||||
// CHECK-NEXT: %[[TMP17:.+]] = load float*, float** %[[B_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP18:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP18]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP17]], i64 %[[IDXPROM]]
|
||||
// CHECK-NEXT: %[[TMP19:.+]] = load float, float* %[[ARRAYIDX]], align 4
|
||||
// CHECK-NEXT: %[[TMP20:.+]] = load float*, float** %[[C_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP21:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP21]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP20]], i64 %[[IDXPROM2]]
|
||||
// CHECK-NEXT: %[[TMP22:.+]] = load float, float* %[[ARRAYIDX3]], align 4
|
||||
// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP19]], %[[TMP22]]
|
||||
// CHECK-NEXT: %[[TMP23:.+]] = load float*, float** %[[D_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP24:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM4:.+]] = sext i32 %[[TMP24]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, float* %[[TMP23]], i64 %[[IDXPROM4]]
|
||||
// CHECK-NEXT: %[[TMP25:.+]] = load float, float* %[[ARRAYIDX5]], align 4
|
||||
// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP25]]
|
||||
// CHECK-NEXT: %[[TMP26:.+]] = load float*, float** %[[A_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP27:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM7:.+]] = sext i32 %[[TMP27]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, float* %[[TMP26]], i64 %[[IDXPROM7]]
|
||||
// CHECK-NEXT: store float %[[MUL6]], float* %[[ARRAYIDX8]], align 4
|
||||
// CHECK-NEXT: br label %[[OMP_TILE0_INC]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_INC]]:
|
||||
// CHECK-NEXT: %[[OMP_TILE0_NEXT]] = add nuw i32 %[[OMP_TILE0_IV]], 1
|
||||
// CHECK-NEXT: br label %[[OMP_TILE0_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_EXIT]]:
|
||||
// CHECK-NEXT: br label %[[OMP_TILE0_AFTER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_AFTER]]:
|
||||
// CHECK-NEXT: br label %[[OMP_FLOOR0_INC]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_INC]]:
|
||||
// CHECK-NEXT: %[[OMP_FLOOR0_NEXT]] = add nuw i32 %[[OMP_FLOOR0_IV]], 1
|
||||
// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_EXIT]]:
|
||||
// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]])
|
||||
// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM9:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
|
||||
// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @2, i32 %[[OMP_GLOBAL_THREAD_NUM9]])
|
||||
// CHECK-NEXT: br label %[[OMP_FLOOR0_AFTER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_AFTER]]:
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_AFTER]]:
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
|
||||
void unroll_partial_factor_for(float *a, float *b, float *c, float *d) {
|
||||
#pragma omp for
|
||||
#pragma omp unroll partial(2)
|
||||
for (int i = 0; i < 2; i++) {
|
||||
a[i] = b[i] * c[i] * d[i];
|
||||
}
|
||||
}
|
||||
|
||||
#endif // HEADER
|
||||
|
||||
// CHECK-LABEL: define {{.*}}@__captured_stmt(
|
||||
// CHECK-NEXT: [[ENTRY:.*]]:
|
||||
// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon*, align 8
|
||||
// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: store i32* %[[DISTANCE:.+]], i32** %[[DISTANCE_ADDR]], align 8
|
||||
// CHECK-NEXT: store %struct.anon* %[[__CONTEXT:.+]], %struct.anon** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon*, %struct.anon** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[TMP0]], i32 0, i32 0
|
||||
// CHECK-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8
|
||||
// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4
|
||||
// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4
|
||||
// CHECK-NEXT: store i32 2, i32* %[[DOTSTOP]], align 4
|
||||
// CHECK-NEXT: store i32 1, i32* %[[DOTSTEP]], align 4
|
||||
// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[DOTSTART]], align 4
|
||||
// CHECK-NEXT: %[[TMP5:.+]] = load i32, i32* %[[DOTSTOP]], align 4
|
||||
// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP4]], %[[TMP5]]
|
||||
// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_TRUE]]:
|
||||
// CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[DOTSTOP]], align 4
|
||||
// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4
|
||||
// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]]
|
||||
// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4
|
||||
// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]]
|
||||
// CHECK-NEXT: br label %[[COND_END:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_FALSE]]:
|
||||
// CHECK-NEXT: br label %[[COND_END]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_END]]:
|
||||
// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ]
|
||||
// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
|
||||
|
||||
// CHECK-LABEL: define {{.*}}@__captured_stmt.1(
|
||||
// CHECK-NEXT: [[ENTRY:.*]]:
|
||||
// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon.0*, align 8
|
||||
// CHECK-NEXT: store i32* %[[LOOPVAR:.+]], i32** %[[LOOPVAR_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 %[[LOGICAL:.+]], i32* %[[LOGICAL_ADDR]], align 4
|
||||
// CHECK-NEXT: store %struct.anon.0* %[[__CONTEXT:.+]], %struct.anon.0** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon.0*, %struct.anon.0** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[TMP0]], i32 0, i32 0
|
||||
// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4
|
||||
// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4
|
||||
// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]]
|
||||
// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]]
|
||||
// CHECK-NEXT: %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 %[[ADD]], i32* %[[TMP4]], align 4
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
|
||||
|
||||
// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4}
|
||||
// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51}
|
||||
// CHECK: ![[META2:[0-9]+]] =
|
||||
// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]}
|
||||
// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"}
|
||||
// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 2}
|
|
@ -1,197 +0,0 @@
|
|||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs
|
||||
// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=51 -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
|
||||
// expected-no-diagnostics
|
||||
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
// CHECK-LABEL: define {{.*}}@unroll_unroll_partial_heuristic(
|
||||
// CHECK-NEXT: [[ENTRY:.*]]:
|
||||
// CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[C_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[D_ADDR:.+]] = alloca float*, align 8
|
||||
// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8
|
||||
// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4
|
||||
// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: store float* %[[A:.+]], float** %[[A_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8
|
||||
// CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 0, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 0
|
||||
// CHECK-NEXT: store i32* %[[I]], i32** %[[TMP0]], align 8
|
||||
// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[AGG_CAPTURED1]], i32 0, i32 0
|
||||
// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: store i32 %[[TMP2]], i32* %[[TMP1]], align 4
|
||||
// CHECK-NEXT: call void @__captured_stmt(i32* %[[DOTCOUNT_ADDR]], %struct.anon* %[[AGG_CAPTURED]])
|
||||
// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, i32* %[[DOTCOUNT_ADDR]], align 4
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]:
|
||||
// CHECK-NEXT: %[[TMP3:.+]] = udiv i32 %[[DOTCOUNT]], 8
|
||||
// CHECK-NEXT: %[[TMP4:.+]] = urem i32 %[[DOTCOUNT]], 8
|
||||
// CHECK-NEXT: %[[TMP5:.+]] = icmp ne i32 %[[TMP4]], 0
|
||||
// CHECK-NEXT: %[[TMP6:.+]] = zext i1 %[[TMP5]] to i32
|
||||
// CHECK-NEXT: %[[OMP_FLOOR0_TRIPCOUNT:.+]] = add nuw i32 %[[TMP3]], %[[TMP6]]
|
||||
// CHECK-NEXT: br label %[[OMP_FLOOR0_PREHEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_PREHEADER]]:
|
||||
// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_HEADER]]:
|
||||
// CHECK-NEXT: %[[OMP_FLOOR0_IV:.+]] = phi i32 [ 0, %[[OMP_FLOOR0_PREHEADER]] ], [ %[[OMP_FLOOR0_NEXT:.+]], %[[OMP_FLOOR0_INC:.+]] ]
|
||||
// CHECK-NEXT: br label %[[OMP_FLOOR0_COND:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_COND]]:
|
||||
// CHECK-NEXT: %[[OMP_FLOOR0_CMP:.+]] = icmp ult i32 %[[OMP_FLOOR0_IV]], %[[OMP_FLOOR0_TRIPCOUNT]]
|
||||
// CHECK-NEXT: br i1 %[[OMP_FLOOR0_CMP]], label %[[OMP_FLOOR0_BODY:.+]], label %[[OMP_FLOOR0_EXIT:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_BODY]]:
|
||||
// CHECK-NEXT: %[[TMP7:.+]] = icmp eq i32 %[[OMP_FLOOR0_IV]], %[[OMP_FLOOR0_TRIPCOUNT]]
|
||||
// CHECK-NEXT: %[[TMP8:.+]] = select i1 %[[TMP7]], i32 %[[TMP4]], i32 8
|
||||
// CHECK-NEXT: br label %[[OMP_TILE0_PREHEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_PREHEADER]]:
|
||||
// CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_HEADER]]:
|
||||
// CHECK-NEXT: %[[OMP_TILE0_IV:.+]] = phi i32 [ 0, %[[OMP_TILE0_PREHEADER]] ], [ %[[OMP_TILE0_NEXT:.+]], %[[OMP_TILE0_INC:.+]] ]
|
||||
// CHECK-NEXT: br label %[[OMP_TILE0_COND:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_COND]]:
|
||||
// CHECK-NEXT: %[[OMP_TILE0_CMP:.+]] = icmp ult i32 %[[OMP_TILE0_IV]], %[[TMP8]]
|
||||
// CHECK-NEXT: br i1 %[[OMP_TILE0_CMP]], label %[[OMP_TILE0_BODY:.+]], label %[[OMP_TILE0_EXIT:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_BODY]]:
|
||||
// CHECK-NEXT: %[[TMP9:.+]] = mul nuw i32 8, %[[OMP_FLOOR0_IV]]
|
||||
// CHECK-NEXT: %[[TMP10:.+]] = add nuw i32 %[[TMP9]], %[[OMP_TILE0_IV]]
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_BODY:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_BODY]]:
|
||||
// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[TMP10]], %struct.anon.0* %[[AGG_CAPTURED1]])
|
||||
// CHECK-NEXT: %[[TMP11:.+]] = load float*, float** %[[B_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP12:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP12]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP11]], i64 %[[IDXPROM]]
|
||||
// CHECK-NEXT: %[[TMP13:.+]] = load float, float* %[[ARRAYIDX]], align 4
|
||||
// CHECK-NEXT: %[[TMP14:.+]] = load float*, float** %[[C_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP15:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP15]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP14]], i64 %[[IDXPROM2]]
|
||||
// CHECK-NEXT: %[[TMP16:.+]] = load float, float* %[[ARRAYIDX3]], align 4
|
||||
// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP13]], %[[TMP16]]
|
||||
// CHECK-NEXT: %[[TMP17:.+]] = load float*, float** %[[D_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP18:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM4:.+]] = sext i32 %[[TMP18]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, float* %[[TMP17]], i64 %[[IDXPROM4]]
|
||||
// CHECK-NEXT: %[[TMP19:.+]] = load float, float* %[[ARRAYIDX5]], align 4
|
||||
// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP19]]
|
||||
// CHECK-NEXT: %[[TMP20:.+]] = load float*, float** %[[A_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP21:.+]] = load i32, i32* %[[I]], align 4
|
||||
// CHECK-NEXT: %[[IDXPROM7:.+]] = sext i32 %[[TMP21]] to i64
|
||||
// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, float* %[[TMP20]], i64 %[[IDXPROM7]]
|
||||
// CHECK-NEXT: store float %[[MUL6]], float* %[[ARRAYIDX8]], align 4
|
||||
// CHECK-NEXT: br label %[[OMP_TILE0_INC]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_INC]]:
|
||||
// CHECK-NEXT: %[[OMP_TILE0_NEXT]] = add nuw i32 %[[OMP_TILE0_IV]], 1
|
||||
// CHECK-NEXT: br label %[[OMP_TILE0_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_EXIT]]:
|
||||
// CHECK-NEXT: br label %[[OMP_TILE0_AFTER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_TILE0_AFTER]]:
|
||||
// CHECK-NEXT: br label %[[OMP_FLOOR0_INC]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_INC]]:
|
||||
// CHECK-NEXT: %[[OMP_FLOOR0_NEXT]] = add nuw i32 %[[OMP_FLOOR0_IV]], 1
|
||||
// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER]], !llvm.loop ![[LOOP6:[0-9]+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_EXIT]]:
|
||||
// CHECK-NEXT: br label %[[OMP_FLOOR0_AFTER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FLOOR0_AFTER]]:
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_AFTER]]:
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
void unroll_unroll_partial_heuristic(float *a, float *b, float *c, float *d) {
|
||||
#pragma omp unroll partial
|
||||
#pragma omp unroll partial
|
||||
for (int i = 0; i < 2; i++) {
|
||||
a[i] = b[i] * c[i] * d[i];
|
||||
}
|
||||
}
|
||||
|
||||
#endif // HEADER
|
||||
|
||||
// CHECK-LABEL: define {{.*}}@__captured_stmt(
|
||||
// CHECK-NEXT: [[ENTRY:.*]]:
|
||||
// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon*, align 8
|
||||
// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: store i32* %[[DISTANCE:.+]], i32** %[[DISTANCE_ADDR]], align 8
|
||||
// CHECK-NEXT: store %struct.anon* %[[__CONTEXT:.+]], %struct.anon** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon*, %struct.anon** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[TMP0]], i32 0, i32 0
|
||||
// CHECK-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8
|
||||
// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4
|
||||
// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4
|
||||
// CHECK-NEXT: store i32 2, i32* %[[DOTSTOP]], align 4
|
||||
// CHECK-NEXT: store i32 1, i32* %[[DOTSTEP]], align 4
|
||||
// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[DOTSTART]], align 4
|
||||
// CHECK-NEXT: %[[TMP5:.+]] = load i32, i32* %[[DOTSTOP]], align 4
|
||||
// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP4]], %[[TMP5]]
|
||||
// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_TRUE]]:
|
||||
// CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[DOTSTOP]], align 4
|
||||
// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4
|
||||
// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]]
|
||||
// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4
|
||||
// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]]
|
||||
// CHECK-NEXT: br label %[[COND_END:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_FALSE]]:
|
||||
// CHECK-NEXT: br label %[[COND_END]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[COND_END]]:
|
||||
// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ]
|
||||
// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
|
||||
|
||||
// CHECK-LABEL: define {{.*}}@__captured_stmt.1(
|
||||
// CHECK-NEXT: [[ENTRY:.*]]:
|
||||
// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon.0*, align 8
|
||||
// CHECK-NEXT: store i32* %[[LOOPVAR:.+]], i32** %[[LOOPVAR_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 %[[LOGICAL:.+]], i32* %[[LOGICAL_ADDR]], align 4
|
||||
// CHECK-NEXT: store %struct.anon.0* %[[__CONTEXT:.+]], %struct.anon.0** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon.0*, %struct.anon.0** %[[__CONTEXT_ADDR]], align 8
|
||||
// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[TMP0]], i32 0, i32 0
|
||||
// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4
|
||||
// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4
|
||||
// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]]
|
||||
// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]]
|
||||
// CHECK-NEXT: %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 %[[ADD]], i32* %[[TMP4]], align 4
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
|
||||
|
||||
// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4}
|
||||
// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51}
|
||||
// CHECK: ![[META2:[0-9]+]] =
|
||||
// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]}
|
||||
// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"}
|
||||
// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 8}
|
||||
// CHECK: ![[LOOP6]] = distinct !{![[LOOP6]], ![[LOOPPROP4]]}
|
|
@ -1305,10 +1305,6 @@ bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name);
|
|||
llvm::Optional<int>
|
||||
getOptionalIntLoopAttribute(const Loop *TheLoop, StringRef Name);
|
||||
|
||||
/// Find named metadata for a loop with an integer value. Return \p Default if
|
||||
/// not set.
|
||||
int getIntLoopAttribute(const Loop *TheLoop, StringRef Name, int Default = 0);
|
||||
|
||||
/// Find string metadata for loop
|
||||
///
|
||||
/// If it has a value (e.g. {"llvm.distribute", 1} return the value as an
|
||||
|
|
|
@ -475,48 +475,6 @@ public:
|
|||
tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
|
||||
ArrayRef<Value *> TileSizes);
|
||||
|
||||
/// Fully unroll a loop.
|
||||
///
|
||||
/// Instead of unrolling the loop immediately (and duplicating its body
|
||||
/// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop
|
||||
/// metadata.
|
||||
///
|
||||
/// \param DL Debug location for instructions added by unrolling.
|
||||
/// \param Loop The loop to unroll. The loop will be invalidated.
|
||||
void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop);
|
||||
|
||||
/// Fully or partially unroll a loop. How the loop is unrolled is determined
|
||||
/// using LLVM's LoopUnrollPass.
|
||||
///
|
||||
/// \param DL Debug location for instructions added by unrolling.
|
||||
/// \param Loop The loop to unroll. The loop will be invalidated.
|
||||
void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop);
|
||||
|
||||
/// Partially unroll a loop.
|
||||
///
|
||||
/// The CanonicalLoopInfo of the unrolled loop for use with chained
|
||||
/// loop-associated directive can be requested using \p UnrolledCLI. Not
|
||||
/// needing the CanonicalLoopInfo allows more efficient code generation by
|
||||
/// deferring the actual unrolling to the LoopUnrollPass using loop metadata.
|
||||
/// A loop-associated directive applied to the unrolled loop needs to know the
|
||||
/// new trip count which means that if using a heuristically determined unroll
|
||||
/// factor (\p Factor == 0), that factor must be computed immediately. We are
|
||||
/// using the same logic as the LoopUnrollPass to derived the unroll factor,
|
||||
/// but which assumes that some canonicalization has taken place (e.g.
|
||||
/// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform
|
||||
/// better when the unrolled loop's CanonicalLoopInfo is not needed.
|
||||
///
|
||||
/// \param DL Debug location for instructions added by unrolling.
|
||||
/// \param Loop The loop to unroll. The loop will be invalidated.
|
||||
/// \param Factor The factor to unroll the loop by. A factor of 0
|
||||
/// indicates that a heuristic should be used to determine
|
||||
/// the unroll-factor.
|
||||
/// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the
|
||||
/// partially unrolled loop. Otherwise, uses loop metadata
|
||||
/// to defer unrolling to the LoopUnrollPass.
|
||||
void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor,
|
||||
CanonicalLoopInfo **UnrolledCLI);
|
||||
|
||||
/// Generator for '#omp flush'
|
||||
///
|
||||
/// \param Loc The location where the flush directive was encountered
|
||||
|
|
|
@ -1102,11 +1102,6 @@ llvm::Optional<int> llvm::getOptionalIntLoopAttribute(const Loop *TheLoop,
|
|||
return IntMD->getSExtValue();
|
||||
}
|
||||
|
||||
int llvm::getIntLoopAttribute(const Loop *TheLoop, StringRef Name,
|
||||
int Default) {
|
||||
return getOptionalIntLoopAttribute(TheLoop, Name).getValueOr(Default);
|
||||
}
|
||||
|
||||
static const char *LLVMLoopMustProgress = "llvm.loop.mustprogress";
|
||||
|
||||
bool llvm::hasMustProgress(const Loop *L) {
|
||||
|
|
|
@ -14,6 +14,5 @@ add_llvm_component_library(LLVMFrontendOpenMP
|
|||
LINK_COMPONENTS
|
||||
Core
|
||||
Support
|
||||
Passes
|
||||
TransformUtils
|
||||
)
|
||||
|
|
|
@ -16,23 +16,15 @@
|
|||
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/ADT/Triple.h"
|
||||
#include "llvm/Analysis/CodeMetrics.h"
|
||||
#include "llvm/Analysis/ScalarEvolution.h"
|
||||
#include "llvm/IR/CFG.h"
|
||||
#include "llvm/IR/DebugInfo.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/MDBuilder.h"
|
||||
#include "llvm/IR/Value.h"
|
||||
#include "llvm/Passes/PassBuilder.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Error.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
#include "llvm/Target/TargetOptions.h"
|
||||
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
||||
#include "llvm/Transforms/Utils/CodeExtractor.h"
|
||||
#include "llvm/Transforms/Utils/LoopPeel.h"
|
||||
#include "llvm/Transforms/Utils/UnrollLoop.h"
|
||||
|
||||
#include <sstream>
|
||||
|
||||
|
@ -47,12 +39,6 @@ static cl::opt<bool>
|
|||
"'as-if' properties of runtime calls."),
|
||||
cl::init(false));
|
||||
|
||||
static cl::opt<double> UnrollThresholdFactor(
|
||||
"openmp-ir-builder-unroll-threshold-factor", cl::Hidden,
|
||||
cl::desc("Factor for the unroll threshold to account for code "
|
||||
"simplifications still taking place"),
|
||||
cl::init(1.5));
|
||||
|
||||
void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) {
|
||||
LLVMContext &Ctx = Fn.getContext();
|
||||
|
||||
|
@ -2070,281 +2056,6 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
|
|||
return Result;
|
||||
}
|
||||
|
||||
/// Attach loop metadata \p Properties to the loop described by \p Loop. If the
|
||||
/// loop already has metadata, the loop properties are appended.
|
||||
static void addLoopMetadata(CanonicalLoopInfo *Loop,
|
||||
ArrayRef<Metadata *> Properties) {
|
||||
assert(Loop->isValid() && "Expecting a valid CanonicalLoopInfo");
|
||||
|
||||
// Nothing to do if no property to attach.
|
||||
if (Properties.empty())
|
||||
return;
|
||||
|
||||
LLVMContext &Ctx = Loop->getFunction()->getContext();
|
||||
SmallVector<Metadata *> NewLoopProperties;
|
||||
NewLoopProperties.push_back(nullptr);
|
||||
|
||||
// If the loop already has metadata, prepend it to the new metadata.
|
||||
BasicBlock *Latch = Loop->getLatch();
|
||||
assert(Latch && "A valid CanonicalLoopInfo must have a unique latch");
|
||||
MDNode *Existing = Latch->getTerminator()->getMetadata(LLVMContext::MD_loop);
|
||||
if (Existing)
|
||||
append_range(NewLoopProperties, drop_begin(Existing->operands(), 1));
|
||||
|
||||
append_range(NewLoopProperties, Properties);
|
||||
MDNode *LoopID = MDNode::getDistinct(Ctx, NewLoopProperties);
|
||||
LoopID->replaceOperandWith(0, LoopID);
|
||||
|
||||
Latch->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopID);
|
||||
}
|
||||
|
||||
void OpenMPIRBuilder::unrollLoopFull(DebugLoc, CanonicalLoopInfo *Loop) {
|
||||
LLVMContext &Ctx = Builder.getContext();
|
||||
addLoopMetadata(
|
||||
Loop, {MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
|
||||
MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.full"))});
|
||||
}
|
||||
|
||||
void OpenMPIRBuilder::unrollLoopHeuristic(DebugLoc, CanonicalLoopInfo *Loop) {
|
||||
LLVMContext &Ctx = Builder.getContext();
|
||||
addLoopMetadata(
|
||||
Loop, {
|
||||
MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
|
||||
});
|
||||
}
|
||||
|
||||
/// Create the TargetMachine object to query the backend for optimization
|
||||
/// preferences.
|
||||
///
|
||||
/// Ideally, this would be passed from the front-end to the OpenMPBuilder, but
|
||||
/// e.g. Clang does not pass it to its CodeGen layer and creates it only when
|
||||
/// needed for the LLVM pass pipline. We use some default options to avoid
|
||||
/// having to pass too many settings from the frontend that probably do not
|
||||
/// matter.
|
||||
///
|
||||
/// Currently, TargetMachine is only used sometimes by the unrollLoopPartial
|
||||
/// method. If we are going to use TargetMachine for more purposes, especially
|
||||
/// those that are sensitive to TargetOptions, RelocModel and CodeModel, it
|
||||
/// might become be worth requiring front-ends to pass on their TargetMachine,
|
||||
/// or at least cache it between methods. Note that while fontends such as Clang
|
||||
/// have just a single main TargetMachine per translation unit, "target-cpu" and
|
||||
/// "target-features" that determine the TargetMachine are per-function and can
|
||||
/// be overrided using __attribute__((target("OPTIONS"))).
|
||||
static std::unique_ptr<TargetMachine>
|
||||
createTargetMachine(Function *F, CodeGenOpt::Level OptLevel) {
|
||||
Module *M = F->getParent();
|
||||
|
||||
StringRef CPU = F->getFnAttribute("target-cpu").getValueAsString();
|
||||
StringRef Features = F->getFnAttribute("target-features").getValueAsString();
|
||||
const std::string &Triple = M->getTargetTriple();
|
||||
|
||||
std::string Error;
|
||||
const llvm::Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error);
|
||||
if (!TheTarget)
|
||||
return {};
|
||||
|
||||
llvm::TargetOptions Options;
|
||||
return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
|
||||
Triple, CPU, Features, Options, /*RelocModel=*/None, /*CodeModel=*/None,
|
||||
OptLevel));
|
||||
}
|
||||
|
||||
/// Heuristically determine the best-performant unroll factor for \p CLI. This
|
||||
/// depends on the target processor. We are re-using the same heuristics as the
|
||||
/// LoopUnrollPass.
|
||||
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI) {
|
||||
Function *F = CLI->getFunction();
|
||||
|
||||
// Assume the user requests the most aggressive unrolling, even if the rest of
|
||||
// the code is optimized using a lower setting.
|
||||
CodeGenOpt::Level OptLevel = CodeGenOpt::Aggressive;
|
||||
std::unique_ptr<TargetMachine> TM = createTargetMachine(F, OptLevel);
|
||||
|
||||
llvm::PassBuilder PB;
|
||||
FunctionAnalysisManager FAM;
|
||||
PB.registerFunctionAnalyses(FAM);
|
||||
TargetIRAnalysis TIRA;
|
||||
if (TM)
|
||||
TIRA = TargetIRAnalysis(
|
||||
[&](const Function &F) { return TM->getTargetTransformInfo(F); });
|
||||
TargetIRAnalysis::Result &&TTI = TIRA.run(*F, FAM);
|
||||
ScalarEvolutionAnalysis SEA;
|
||||
ScalarEvolution &&SE = SEA.run(*F, FAM);
|
||||
DominatorTreeAnalysis DTA;
|
||||
DominatorTree &&DT = DTA.run(*F, FAM);
|
||||
LoopAnalysis LIA;
|
||||
LoopInfo &&LI = LIA.run(*F, FAM);
|
||||
AssumptionAnalysis ACT;
|
||||
AssumptionCache &&AC = ACT.run(*F, FAM);
|
||||
OptimizationRemarkEmitter ORE{F};
|
||||
|
||||
Loop *L = LI.getLoopFor(CLI->getHeader());
|
||||
assert(L && "Expecting CanonicalLoopInfo to be recognized as a loop");
|
||||
|
||||
TargetTransformInfo::UnrollingPreferences UP =
|
||||
gatherUnrollingPreferences(L, SE, TTI,
|
||||
/*BlockFrequencyInfo=*/nullptr,
|
||||
/*ProfileSummaryInfo=*/nullptr, ORE, OptLevel,
|
||||
/*UserThreshold=*/None,
|
||||
/*UserCount=*/None,
|
||||
/*UserAllowPartial=*/true,
|
||||
/*UserAllowRuntime=*/true,
|
||||
/*UserUpperBound=*/None,
|
||||
/*UserFullUnrollMaxCount=*/None);
|
||||
|
||||
UP.Force = true;
|
||||
|
||||
// Account for additional optimizations taking place before the LoopUnrollPass
|
||||
// would unroll the loop.
|
||||
UP.Threshold *= UnrollThresholdFactor;
|
||||
UP.PartialThreshold *= UnrollThresholdFactor;
|
||||
|
||||
// Use normal unroll factors even if the rest of the code is optimized for
|
||||
// size.
|
||||
UP.OptSizeThreshold = UP.Threshold;
|
||||
UP.PartialOptSizeThreshold = UP.PartialThreshold;
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Unroll heuristic thresholds:\n"
|
||||
<< " Threshold=" << UP.Threshold << "\n"
|
||||
<< " PartialThreshold=" << UP.PartialThreshold << "\n"
|
||||
<< " OptSizeThreshold=" << UP.OptSizeThreshold << "\n"
|
||||
<< " PartialOptSizeThreshold="
|
||||
<< UP.PartialOptSizeThreshold << "\n");
|
||||
|
||||
// Disable peeling.
|
||||
TargetTransformInfo::PeelingPreferences PP =
|
||||
gatherPeelingPreferences(L, SE, TTI,
|
||||
/*UserAllowPeeling=*/false,
|
||||
/*UserAllowProfileBasedPeeling=*/false,
|
||||
/*UserUnrollingSpecficValues=*/false);
|
||||
|
||||
SmallPtrSet<const Value *, 32> EphValues;
|
||||
CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
|
||||
|
||||
// Assume that reads and writes to stack variables can be eliminated by
|
||||
// Mem2Reg, SROA or LICM. That is, don't count them towards the loop body's
|
||||
// size.
|
||||
for (BasicBlock *BB : L->blocks()) {
|
||||
for (Instruction &I : *BB) {
|
||||
Value *Ptr;
|
||||
if (auto *Load = dyn_cast<LoadInst>(&I)) {
|
||||
Ptr = Load->getPointerOperand();
|
||||
} else if (auto *Store = dyn_cast<StoreInst>(&I)) {
|
||||
Ptr = Store->getPointerOperand();
|
||||
} else
|
||||
continue;
|
||||
|
||||
Ptr = Ptr->stripPointerCasts();
|
||||
|
||||
if (auto *Alloca = dyn_cast<AllocaInst>(Ptr)) {
|
||||
if (Alloca->getParent() == &F->getEntryBlock())
|
||||
EphValues.insert(&I);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsigned NumInlineCandidates;
|
||||
bool NotDuplicatable;
|
||||
bool Convergent;
|
||||
unsigned LoopSize =
|
||||
ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
|
||||
TTI, EphValues, UP.BEInsns);
|
||||
LLVM_DEBUG(dbgs() << "Estimated loop size is " << LoopSize << "\n");
|
||||
|
||||
// Loop is not unrollable if the loop contains certain instructions.
|
||||
if (NotDuplicatable || Convergent) {
|
||||
LLVM_DEBUG(dbgs() << "Loop not considered unrollable\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
// TODO: Determine trip count of \p CLI if constant, computeUnrollCount might
|
||||
// be able to use it.
|
||||
int TripCount = 0;
|
||||
int MaxTripCount = 0;
|
||||
bool MaxOrZero = false;
|
||||
unsigned TripMultiple = 0;
|
||||
|
||||
bool UseUpperBound = false;
|
||||
computeUnrollCount(L, TTI, DT, &LI, SE, EphValues, &ORE, TripCount,
|
||||
MaxTripCount, MaxOrZero, TripMultiple, LoopSize, UP, PP,
|
||||
UseUpperBound);
|
||||
unsigned Factor = UP.Count;
|
||||
LLVM_DEBUG(dbgs() << "Suggesting unroll factor of " << Factor << "\n");
|
||||
|
||||
// This function returns 1 to signal to not unroll a loop.
|
||||
if (Factor == 0)
|
||||
return 1;
|
||||
return Factor;
|
||||
}
|
||||
|
||||
void OpenMPIRBuilder::unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop,
|
||||
int32_t Factor,
|
||||
CanonicalLoopInfo **UnrolledCLI) {
|
||||
assert(Factor >= 0 && "Unroll factor must not be negative");
|
||||
|
||||
Function *F = Loop->getFunction();
|
||||
LLVMContext &Ctx = F->getContext();
|
||||
|
||||
// If the unrolled loop is not used for another loop-associated directive, it
|
||||
// is sufficient to add metadata for the LoopUnrollPass.
|
||||
if (!UnrolledCLI) {
|
||||
SmallVector<Metadata *, 2> LoopMetadata;
|
||||
LoopMetadata.push_back(
|
||||
MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")));
|
||||
|
||||
if (Factor >= 1) {
|
||||
ConstantAsMetadata *FactorConst = ConstantAsMetadata::get(
|
||||
ConstantInt::get(Type::getInt32Ty(Ctx), APInt(32, Factor)));
|
||||
LoopMetadata.push_back(MDNode::get(
|
||||
Ctx, {MDString::get(Ctx, "llvm.loop.unroll.count"), FactorConst}));
|
||||
}
|
||||
|
||||
addLoopMetadata(Loop, LoopMetadata);
|
||||
return;
|
||||
}
|
||||
|
||||
// Heuristically determine the unroll factor.
|
||||
if (Factor == 0)
|
||||
Factor = computeHeuristicUnrollFactor(Loop);
|
||||
|
||||
// No change required with unroll factor 1.
|
||||
if (Factor == 1) {
|
||||
*UnrolledCLI = Loop;
|
||||
return;
|
||||
}
|
||||
|
||||
assert(Factor >= 2 &&
|
||||
"unrolling only makes sense with a factor of 2 or larger");
|
||||
|
||||
Type *IndVarTy = Loop->getIndVarType();
|
||||
|
||||
// Apply partial unrolling by tiling the loop by the unroll-factor, then fully
|
||||
// unroll the inner loop.
|
||||
Value *FactorVal =
|
||||
ConstantInt::get(IndVarTy, APInt(IndVarTy->getIntegerBitWidth(), Factor,
|
||||
/*isSigned=*/false));
|
||||
std::vector<CanonicalLoopInfo *> LoopNest =
|
||||
tileLoops(DL, {Loop}, {FactorVal});
|
||||
assert(LoopNest.size() == 2 && "Expect 2 loops after tiling");
|
||||
*UnrolledCLI = LoopNest[0];
|
||||
CanonicalLoopInfo *InnerLoop = LoopNest[1];
|
||||
|
||||
// LoopUnrollPass can only fully unroll loops with constant trip count.
|
||||
// Unroll by the unroll factor with a fallback epilog for the remainder
|
||||
// iterations if necessary.
|
||||
ConstantAsMetadata *FactorConst = ConstantAsMetadata::get(
|
||||
ConstantInt::get(Type::getInt32Ty(Ctx), APInt(32, Factor)));
|
||||
addLoopMetadata(
|
||||
InnerLoop,
|
||||
{MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
|
||||
MDNode::get(
|
||||
Ctx, {MDString::get(Ctx, "llvm.loop.unroll.count"), FactorConst})});
|
||||
|
||||
#ifndef NDEBUG
|
||||
(*UnrolledCLI)->assertOK();
|
||||
#endif
|
||||
}
|
||||
|
||||
OpenMPIRBuilder::InsertPointTy
|
||||
OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc,
|
||||
llvm::Value *BufSize, llvm::Value *CpyBuf,
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
#include "llvm/IR/LLVMContext.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/IR/Verifier.h"
|
||||
#include "llvm/Passes/PassBuilder.h"
|
||||
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
|
@ -143,40 +142,6 @@ protected:
|
|||
M.reset();
|
||||
}
|
||||
|
||||
/// Create a function with a simple loop that calls printf using the logical
|
||||
/// loop counter for use with tests that need a CanonicalLoopInfo object.
|
||||
CanonicalLoopInfo *buildSingleLoopFunction(DebugLoc DL,
|
||||
OpenMPIRBuilder &OMPBuilder,
|
||||
Instruction **Call = nullptr,
|
||||
BasicBlock **BodyCode = nullptr) {
|
||||
OMPBuilder.initialize();
|
||||
F->setName("func");
|
||||
|
||||
IRBuilder<> Builder(BB);
|
||||
OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
|
||||
Value *TripCount = F->getArg(0);
|
||||
|
||||
auto LoopBodyGenCB = [&](OpenMPIRBuilder::InsertPointTy CodeGenIP,
|
||||
llvm::Value *LC) {
|
||||
Builder.restoreIP(CodeGenIP);
|
||||
if (BodyCode)
|
||||
*BodyCode = Builder.GetInsertBlock();
|
||||
|
||||
// Add something that consumes the induction variable to the body.
|
||||
CallInst *CallInst = createPrintfCall(Builder, "%d\\n", {LC});
|
||||
if (Call)
|
||||
*Call = CallInst;
|
||||
};
|
||||
CanonicalLoopInfo *Loop =
|
||||
OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, TripCount);
|
||||
|
||||
// Finalize the function.
|
||||
Builder.restoreIP(Loop->getAfterIP());
|
||||
Builder.CreateRetVoid();
|
||||
|
||||
return Loop;
|
||||
}
|
||||
|
||||
LLVMContext Ctx;
|
||||
std::unique_ptr<Module> M;
|
||||
Function *F;
|
||||
|
@ -1323,11 +1288,30 @@ TEST_F(OpenMPIRBuilderTest, CollapseNestedLoops) {
|
|||
}
|
||||
|
||||
TEST_F(OpenMPIRBuilderTest, TileSingleLoop) {
|
||||
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
|
||||
OpenMPIRBuilder OMPBuilder(*M);
|
||||
Instruction *Call;
|
||||
BasicBlock *BodyCode;
|
||||
OMPBuilder.initialize();
|
||||
F->setName("func");
|
||||
|
||||
IRBuilder<> Builder(BB);
|
||||
OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
|
||||
Value *TripCount = F->getArg(0);
|
||||
|
||||
BasicBlock *BodyCode = nullptr;
|
||||
Instruction *Call = nullptr;
|
||||
auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {
|
||||
Builder.restoreIP(CodeGenIP);
|
||||
BodyCode = Builder.GetInsertBlock();
|
||||
|
||||
// Add something that consumes the induction variable to the body.
|
||||
Call = createPrintfCall(Builder, "%d\\n", {LC});
|
||||
};
|
||||
CanonicalLoopInfo *Loop =
|
||||
buildSingleLoopFunction(DL, OMPBuilder, &Call, &BodyCode);
|
||||
OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, TripCount);
|
||||
|
||||
// Finalize the function.
|
||||
Builder.restoreIP(Loop->getAfterIP());
|
||||
Builder.CreateRetVoid();
|
||||
|
||||
Instruction *OrigIndVar = Loop->getIndVar();
|
||||
EXPECT_EQ(Call->getOperand(1), OrigIndVar);
|
||||
|
@ -1664,86 +1648,6 @@ TEST_F(OpenMPIRBuilderTest, TileSingleLoopCounts) {
|
|||
EXPECT_FALSE(verifyModule(*M, &errs()));
|
||||
}
|
||||
|
||||
TEST_F(OpenMPIRBuilderTest, UnrollLoopFull) {
|
||||
OpenMPIRBuilder OMPBuilder(*M);
|
||||
|
||||
CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder);
|
||||
|
||||
// Unroll the loop.
|
||||
OMPBuilder.unrollLoopFull(DL, CLI);
|
||||
|
||||
OMPBuilder.finalize();
|
||||
EXPECT_FALSE(verifyModule(*M, &errs()));
|
||||
|
||||
PassBuilder PB;
|
||||
FunctionAnalysisManager FAM;
|
||||
PB.registerFunctionAnalyses(FAM);
|
||||
LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
|
||||
|
||||
const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
|
||||
EXPECT_EQ(TopLvl.size(), 1u);
|
||||
|
||||
Loop *L = TopLvl.front();
|
||||
EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"));
|
||||
EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.full"));
|
||||
}
|
||||
|
||||
TEST_F(OpenMPIRBuilderTest, UnrollLoopPartial) {
|
||||
OpenMPIRBuilder OMPBuilder(*M);
|
||||
CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder);
|
||||
|
||||
// Unroll the loop.
|
||||
CanonicalLoopInfo *UnrolledLoop = nullptr;
|
||||
OMPBuilder.unrollLoopPartial(DL, CLI, 5, &UnrolledLoop);
|
||||
ASSERT_NE(UnrolledLoop, nullptr);
|
||||
|
||||
OMPBuilder.finalize();
|
||||
EXPECT_FALSE(verifyModule(*M, &errs()));
|
||||
UnrolledLoop->assertOK();
|
||||
|
||||
PassBuilder PB;
|
||||
FunctionAnalysisManager FAM;
|
||||
PB.registerFunctionAnalyses(FAM);
|
||||
LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
|
||||
|
||||
const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
|
||||
EXPECT_EQ(TopLvl.size(), 1u);
|
||||
Loop *Outer = TopLvl.front();
|
||||
EXPECT_EQ(Outer->getHeader(), UnrolledLoop->getHeader());
|
||||
EXPECT_EQ(Outer->getLoopLatch(), UnrolledLoop->getLatch());
|
||||
EXPECT_EQ(Outer->getExitingBlock(), UnrolledLoop->getCond());
|
||||
EXPECT_EQ(Outer->getExitBlock(), UnrolledLoop->getExit());
|
||||
|
||||
EXPECT_EQ(Outer->getSubLoops().size(), 1u);
|
||||
Loop *Inner = Outer->getSubLoops().front();
|
||||
|
||||
EXPECT_TRUE(getBooleanLoopAttribute(Inner, "llvm.loop.unroll.enable"));
|
||||
EXPECT_EQ(getIntLoopAttribute(Inner, "llvm.loop.unroll.count"), 5);
|
||||
}
|
||||
|
||||
TEST_F(OpenMPIRBuilderTest, UnrollLoopHeuristic) {
|
||||
OpenMPIRBuilder OMPBuilder(*M);
|
||||
|
||||
CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder);
|
||||
|
||||
// Unroll the loop.
|
||||
OMPBuilder.unrollLoopHeuristic(DL, CLI);
|
||||
|
||||
OMPBuilder.finalize();
|
||||
EXPECT_FALSE(verifyModule(*M, &errs()));
|
||||
|
||||
PassBuilder PB;
|
||||
FunctionAnalysisManager FAM;
|
||||
PB.registerFunctionAnalyses(FAM);
|
||||
LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
|
||||
|
||||
const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
|
||||
EXPECT_EQ(TopLvl.size(), 1u);
|
||||
|
||||
Loop *L = TopLvl.front();
|
||||
EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"));
|
||||
}
|
||||
|
||||
TEST_F(OpenMPIRBuilderTest, StaticWorkShareLoop) {
|
||||
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
|
||||
OpenMPIRBuilder OMPBuilder(*M);
|
||||
|
|
Loading…
Reference in New Issue