[OPENMP50]Codegen for nontemporal clause.

Summary:
Basic codegen for the declarations marked as nontemporal. Also, if the
base declaration in the member expression is marked as nontemporal,
lvalue for member decl access inherits nonteporal flag from the base
lvalue.

Reviewers: rjmccall, hfinkel, jdoerfert

Subscribers: guansong, arphaman, caomhin, kkwli0, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D71708
This commit is contained in:
Alexey Bataev 2019-12-19 10:01:10 -05:00
parent e40ac74dac
commit 0860db966a
20 changed files with 340 additions and 116 deletions

View File

@ -6275,6 +6275,15 @@ class OMPNontemporalClause final
OMPC_nontemporal, SourceLocation(), SourceLocation(),
SourceLocation(), N) {}
/// Get the list of privatied copies if the member expression was captured by
/// one of the privatization clauses.
MutableArrayRef<Expr *> getPrivateRefs() {
return MutableArrayRef<Expr *>(varlist_end(), varlist_size());
}
ArrayRef<const Expr *> getPrivateRefs() const {
return llvm::makeArrayRef(varlist_end(), varlist_size());
}
public:
/// Creates clause with a list of variables \a VL.
///
@ -6293,6 +6302,10 @@ public:
/// \param N The number of variables.
static OMPNontemporalClause *CreateEmpty(const ASTContext &C, unsigned N);
/// Sets the list of references to private copies created in private clauses.
/// \param VL List of references.
void setPrivateRefs(ArrayRef<Expr *> VL);
child_range children() {
return child_range(reinterpret_cast<Stmt **>(varlist_begin()),
reinterpret_cast<Stmt **>(varlist_end()));
@ -6303,6 +6316,16 @@ public:
return const_child_range(Children.begin(), Children.end());
}
child_range private_refs() {
return child_range(reinterpret_cast<Stmt **>(getPrivateRefs().begin()),
reinterpret_cast<Stmt **>(getPrivateRefs().end()));
}
const_child_range private_refs() const {
auto Children = const_cast<OMPNontemporalClause *>(this)->private_refs();
return const_child_range(Children.begin(), Children.end());
}
child_range used_children() {
return child_range(child_iterator(), child_iterator());
}

View File

@ -3378,6 +3378,9 @@ template <typename Derived>
bool RecursiveASTVisitor<Derived>::VisitOMPNontemporalClause(
OMPNontemporalClause *C) {
TRY_TO(VisitOMPClauseList(C));
for (auto *E : C->private_refs()) {
TRY_TO(TraverseStmt(E));
}
return true;
}

View File

@ -1162,8 +1162,8 @@ OMPNontemporalClause *OMPNontemporalClause::Create(const ASTContext &C,
SourceLocation LParenLoc,
SourceLocation EndLoc,
ArrayRef<Expr *> VL) {
// Allocate space for nontemporal variables.
void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(VL.size()));
// Allocate space for nontemporal variables + private references.
void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(2 * VL.size()));
auto *Clause =
new (Mem) OMPNontemporalClause(StartLoc, LParenLoc, EndLoc, VL.size());
Clause->setVarRefs(VL);
@ -1172,10 +1172,16 @@ OMPNontemporalClause *OMPNontemporalClause::Create(const ASTContext &C,
OMPNontemporalClause *OMPNontemporalClause::CreateEmpty(const ASTContext &C,
unsigned N) {
void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(N));
void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(2 * N));
return new (Mem) OMPNontemporalClause(N);
}
void OMPNontemporalClause::setPrivateRefs(ArrayRef<Expr *> VL) {
assert(VL.size() == varlist_size() && "Number of private references is not "
"the same as the preallocated buffer");
std::copy(VL.begin(), VL.end(), varlist_end());
}
//===----------------------------------------------------------------------===//
// OpenMP clauses printing methods
//===----------------------------------------------------------------------===//

View File

@ -769,10 +769,13 @@ void OMPClauseProfiler::VisitOMPIsDevicePtrClause(
const OMPIsDevicePtrClause *C) {
VisitOMPClauseList(C);
}
void OMPClauseProfiler::VisitOMPNontemporalClause(const OMPNontemporalClause *C) {
void OMPClauseProfiler::VisitOMPNontemporalClause(
const OMPNontemporalClause *C) {
VisitOMPClauseList(C);
for (auto *E : C->private_refs())
Profiler->VisitStmt(E);
}
}
} // namespace
void
StmtProfiler::VisitOMPExecutableDirective(const OMPExecutableDirective *S) {

View File

@ -2566,21 +2566,35 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
VD = VD->getCanonicalDecl();
if (auto *FD = LambdaCaptureFields.lookup(VD))
return EmitCapturedFieldLValue(*this, FD, CXXABIThisValue);
else if (CapturedStmtInfo) {
if (CapturedStmtInfo) {
auto I = LocalDeclMap.find(VD);
if (I != LocalDeclMap.end()) {
LValue CapLVal;
if (VD->getType()->isReferenceType())
return EmitLoadOfReferenceLValue(I->second, VD->getType(),
AlignmentSource::Decl);
return MakeAddrLValue(I->second, T);
CapLVal = EmitLoadOfReferenceLValue(I->second, VD->getType(),
AlignmentSource::Decl);
else
CapLVal = MakeAddrLValue(I->second, T);
// Mark lvalue as nontemporal if the variable is marked as nontemporal
// in simd context.
if (getLangOpts().OpenMP &&
CGM.getOpenMPRuntime().isNontemporalDecl(VD))
CapLVal.setNontemporal(/*Value=*/true);
return CapLVal;
}
LValue CapLVal =
EmitCapturedFieldLValue(*this, CapturedStmtInfo->lookup(VD),
CapturedStmtInfo->getContextValue());
return MakeAddrLValue(
CapLVal = MakeAddrLValue(
Address(CapLVal.getPointer(*this), getContext().getDeclAlign(VD)),
CapLVal.getType(), LValueBaseInfo(AlignmentSource::Decl),
CapLVal.getTBAAInfo());
// Mark lvalue as nontemporal if the variable is marked as nontemporal
// in simd context.
if (getLangOpts().OpenMP &&
CGM.getOpenMPRuntime().isNontemporalDecl(VD))
CapLVal.setNontemporal(/*Value=*/true);
return CapLVal;
}
assert(isa<BlockDecl>(CurCodeDecl));
@ -3929,6 +3943,15 @@ LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) {
if (auto *Field = dyn_cast<FieldDecl>(ND)) {
LValue LV = EmitLValueForField(BaseLV, Field);
setObjCGCLValueClass(getContext(), E, LV);
if (getLangOpts().OpenMP) {
// If the member was explicitly marked as nontemporal, mark it as
// nontemporal. If the base lvalue is marked as nontemporal, mark access
// to children as nontemporal too.
if ((IsWrappedCXXThis(BaseExpr) &&
CGM.getOpenMPRuntime().isNontemporalDecl(Field)) ||
BaseLV.isNontemporal())
LV.setNontemporal(/*Value=*/true);
}
return LV;
}

View File

@ -17,6 +17,7 @@
#include "CodeGenFunction.h"
#include "clang/AST/Attr.h"
#include "clang/AST/Decl.h"
#include "clang/AST/OpenMPClause.h"
#include "clang/AST/StmtOpenMP.h"
#include "clang/Basic/BitmaskEnum.h"
#include "clang/CodeGen/ConstantInitBuilder.h"
@ -11341,6 +11342,46 @@ bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) {
return true;
}
CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
CodeGenModule &CGM, const OMPLoopDirective &S)
: CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
if (!NeedToPush)
return;
NontemporalDeclsSet &DS =
CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
for (const Stmt *Ref : C->private_refs()) {
const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
const ValueDecl *VD;
if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
VD = DRE->getDecl();
} else {
const auto *ME = cast<MemberExpr>(SimpleRefExpr);
assert((ME->isImplicitCXXThis() ||
isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
"Expected member of current class.");
VD = ME->getMemberDecl();
}
DS.insert(VD);
}
}
}
CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
if (!NeedToPush)
return;
CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
}
bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
return llvm::any_of(
CGM.getOpenMPRuntime().NontemporalDeclsStack,
[VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
}
llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {

View File

@ -211,6 +211,16 @@ public:
~DisableAutoDeclareTargetRAII();
};
/// Manages list of nontemporal decls for the specified directive.
class NontemporalDeclsRAII {
CodeGenModule &CGM;
const bool NeedToPush;
public:
NontemporalDeclsRAII(CodeGenModule &CGM, const OMPLoopDirective &S);
~NontemporalDeclsRAII();
};
protected:
CodeGenModule &CGM;
StringRef FirstSeparator, Separator;
@ -650,6 +660,11 @@ private:
std::pair<GlobalDecl, GlobalDecl>>
DeferredVariantFunction;
using NontemporalDeclsSet = llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>>;
/// Stack for list of declarations in current context marked as nontemporal.
/// The set is the union of all current stack elements.
llvm::SmallVector<NontemporalDeclsSet, 4> NontemporalDeclsStack;
/// Flag for keeping track of weather a requires unified_shared_memory
/// directive is present.
bool HasRequiresUnifiedSharedMemory = false;
@ -1663,6 +1678,10 @@ public:
/// Emits the definition of the declare variant function.
virtual bool emitDeclareVariant(GlobalDecl GD, bool IsForDefinition);
/// Checks if the \p VD variable is marked as nontemporal declaration in
/// current context.
bool isNontemporalDecl(const ValueDecl *VD) const;
};
/// Class supports emissionof SIMD-only code.

View File

@ -1803,8 +1803,9 @@ static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S,
const RegionCodeGenTy &SimdInitGen,
const RegionCodeGenTy &BodyCodeGen) {
auto &&ThenGen = [&SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF,
PrePostActionTy &) {
auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF,
PrePostActionTy &) {
CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S);
CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
SimdInitGen(CGF);

View File

@ -2230,6 +2230,11 @@ void Sema::EndOpenMPClause() {
static void checkAllocateClauses(Sema &S, DSAStackTy *Stack,
ArrayRef<OMPClause *> Clauses);
static std::pair<ValueDecl *, bool>
getPrivateItem(Sema &S, Expr *&RefExpr, SourceLocation &ELoc,
SourceRange &ERange, bool AllowArraySection = false);
static DeclRefExpr *buildCapture(Sema &S, ValueDecl *D, Expr *CaptureExpr,
bool WithInit);
void Sema::EndOpenMPDSABlock(Stmt *CurDirective) {
// OpenMP [2.14.3.5, Restrictions, C/C++, p.1]
@ -2274,6 +2279,31 @@ void Sema::EndOpenMPDSABlock(Stmt *CurDirective) {
}
}
Clause->setPrivateCopies(PrivateCopies);
continue;
}
// Finalize nontemporal clause by handling private copies, if any.
if (auto *Clause = dyn_cast<OMPNontemporalClause>(C)) {
SmallVector<Expr *, 8> PrivateRefs;
for (Expr *RefExpr : Clause->varlists()) {
assert(RefExpr && "NULL expr in OpenMP nontemporal clause.");
SourceLocation ELoc;
SourceRange ERange;
Expr *SimpleRefExpr = RefExpr;
auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange);
if (Res.second)
// It will be analyzed later.
PrivateRefs.push_back(RefExpr);
ValueDecl *D = Res.first;
if (!D)
continue;
const DSAStackTy::DSAVarData DVar =
DSAStack->getTopDSA(D, /*FromParent=*/false);
PrivateRefs.push_back(DVar.PrivateCopy ? DVar.PrivateCopy
: SimpleRefExpr);
}
Clause->setPrivateRefs(PrivateRefs);
continue;
}
}
// Check allocate clauses.
@ -4262,9 +4292,10 @@ static bool checkIfClauses(Sema &S, OpenMPDirectiveKind Kind,
return ErrorFound;
}
static std::pair<ValueDecl *, bool>
getPrivateItem(Sema &S, Expr *&RefExpr, SourceLocation &ELoc,
SourceRange &ERange, bool AllowArraySection = false) {
static std::pair<ValueDecl *, bool> getPrivateItem(Sema &S, Expr *&RefExpr,
SourceLocation &ELoc,
SourceRange &ERange,
bool AllowArraySection) {
if (RefExpr->isTypeDependent() || RefExpr->isValueDependent() ||
RefExpr->containsUnexpandedParameterPack())
return std::make_pair(nullptr, true);
@ -17172,8 +17203,6 @@ OMPClause *Sema::ActOnOpenMPNontemporalClause(ArrayRef<Expr *> VarList,
if (!D)
continue;
auto *VD = dyn_cast<VarDecl>(D);
// OpenMP 5.0, 2.9.3.1 simd Construct, Restrictions.
// A list-item cannot appear in more than one nontemporal clause.
if (const Expr *PrevRef =
@ -17185,12 +17214,7 @@ OMPClause *Sema::ActOnOpenMPNontemporalClause(ArrayRef<Expr *> VarList,
continue;
}
DeclRefExpr *Ref = nullptr;
if (!VD && isOpenMPCapturedDecl(D) && !CurContext->isDependentContext())
Ref = buildCapture(*this, D, SimpleRefExpr, /*WithInit=*/true);
Vars.push_back((VD || !Ref || CurContext->isDependentContext())
? RefExpr->IgnoreParens()
: Ref);
Vars.push_back(RefExpr);
}
if (Vars.empty())

View File

@ -12466,4 +12466,9 @@ void OMPClauseReader::VisitOMPNontemporalClause(OMPNontemporalClause *C) {
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setVarRefs(Vars);
Vars.clear();
Vars.reserve(NumVars);
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setPrivateRefs(Vars);
}

View File

@ -6544,4 +6544,6 @@ void OMPClauseWriter::VisitOMPNontemporalClause(OMPNontemporalClause *C) {
Record.AddSourceLocation(C->getLParenLoc());
for (auto *VE : C->varlists())
Record.AddStmt(VE);
for (auto *E : C->private_refs())
Record.AddStmt(E);
}

View File

@ -143,7 +143,7 @@ void static_not_chunked(float *a, float *b, float *c, float *d) {
#pragma omp target
#pragma omp teams
#ifdef OMP5
#pragma omp distribute simd dist_schedule(static) safelen(32) if(simd: true)
#pragma omp distribute simd dist_schedule(static) safelen(32) if(simd: true) nontemporal(a, b)
#else
#pragma omp distribute simd dist_schedule(static) safelen(32)
#endif // OMP5
@ -189,6 +189,11 @@ void static_not_chunked(float *a, float *b, float *c, float *d) {
// CHECK: [[BBINNBODY]]:
// CHECK: {{.+}} = load i32, i32* [[IV]]
// ... loop body ...
// OMP45-NOT: !nontemporal
// OMP50: load float*,{{.*}}!nontemporal
// OMP50: load float*,{{.*}}!nontemporal
// OMP50-NOT: !nontemporal
// CHECK: br label %[[BBBODYCONT:.+]]
// CHECK: [[BBBODYCONT]]:
// CHECK: br label %[[BBINNINC:.+]]
@ -271,7 +276,7 @@ void test_precond() {
#pragma omp target
#pragma omp teams
#ifdef OMP5
#pragma omp distribute simd linear(i) if(a)
#pragma omp distribute simd linear(i) if(a) nontemporal(i)
#else
#pragma omp distribute simd linear(i)
#endif // OMP5
@ -293,6 +298,9 @@ void test_precond() {
// CHECK: br i1 [[ACMP]], label %[[PRECOND_THEN:.+]], label %[[PRECOND_END:.+]]
// CHECK: [[PRECOND_THEN]]
// CHECK: call void @__kmpc_for_static_init_4
// OMP45-NOT: !nontemporal
// OMP50: store i8 {{.*}}!nontemporal
// OMP50-NOT: !nontemporal
// CHECK: call void @__kmpc_for_static_fini
// CHECK: [[PRECOND_END]]

View File

@ -333,7 +333,7 @@ void simple(float *a, float *b, float *c, float *d) {
// OMP50: br i1 [[COND]], label {{%?}}[[THEN:[^,]+]], label {{%?}}[[ELSE:[^,]+]]
// OMP50: [[THEN]]:
#ifdef OMP5
#pragma omp for simd reduction(*:R) if (simd:A)
#pragma omp for simd reduction(*:R) if (simd:A) nontemporal(R)
#else
#pragma omp for simd reduction(*:R)
#endif
@ -366,7 +366,8 @@ void simple(float *a, float *b, float *c, float *d) {
// CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i64 -10, [[LC_IT_1]]
// CHECK-NEXT: store i64 [[LC_IT_2]], i64* [[LC:%[^,]+]],
// CHECK-NEXT: [[LC_VAL:%.+]] = load i64, i64* [[LC]]
// CHECK: store i32 %{{.+}}, i32* [[R_PRIV]],
// OMP45: store i32 %{{.+}}, i32* [[R_PRIV]],
// OMP50: store i32 %{{.+}}, i32* [[R_PRIV]],{{.*}}!nontemporal
R *= i;
// CHECK: [[IV8_2:%.+]] = load i64, i64* [[OMP_IV8]]
// CHECK-NEXT: [[ADD8_2:%.+]] = add nsw i64 [[IV8_2]], 1

View File

@ -22,10 +22,15 @@
long long get_val() { return 0; }
double *g_ptr;
struct S {
int a, b;
};
// CHECK-LABEL: define {{.*void}} @{{.*}}simple{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
void simple(float *a, float *b, float *c, float *d) {
S s, *p;
#ifdef OMP5
#pragma omp simd if (simd: true)
#pragma omp simd if (simd: true) nontemporal(a, b, c, d, s)
#else
#pragma omp simd
#endif
@ -43,8 +48,17 @@ void simple(float *a, float *b, float *c, float *d) {
// CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]]{{.*}}!llvm.access.group
// ... loop body ...
// End of body: store into a[i]:
// OMP45-NOT: load float*,{{.*}}!nontemporal
// CHECK-NOT: load float,{{.*}}!nontemporal
// OMP50: load float*,{{.*}}!nontemporal
// OMP50: load float*,{{.*}}!nontemporal
// OMP50: load float*,{{.*}}!nontemporal
// OMP50: load i32,{{.*}}!nontemporal
// OMP50-NOT: load i32,{{.*}}!nontemporal
// OMP50: load float*,{{.*}}!nontemporal
// CHECK-NOT: load float,{{.*}}!nontemporal
// CHECK: store float [[RESULT:%.+]], float* {{%.+}}{{.*}}!llvm.access.group
a[i] = b[i] * c[i] * d[i];
a[i] = b[i] * c[i] * d[i] + s.a + p->a;
// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group
// CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1
// CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]]{{.*}}!llvm.access.group
@ -718,6 +732,47 @@ void linear(float *a) {
//
}
#ifdef OMP5
// OMP50-LABEL: inner_simd
void inner_simd() {
double a, b;
#pragma omp simd nontemporal(a)
for (int i = 0; i < 10; ++i) {
#pragma omp simd nontemporal(b)
for (int k = 0; k < 10; ++k) {
// OMP50: load double,{{.*}}!nontemporal
// OMP50: store double{{.*}}!nontemporal
a = b;
}
// OMP50-NOT: load double,{{.*}}!nontemporal
// OMP50: load double,
// OMP50: store double{{.*}}!nontemporal
a = b;
}
}
extern struct T t;
struct Base {
float a;
};
struct T : public Base {
void foo() {
#pragma omp simd nontemporal(Base::a)
for (int i = 0; i < 10; ++i) {
// OMP50: store float{{.*}}!nontemporal
// OMP50-NOT: nontemporal
// OMP50-NEXT: store float
Base::a = 0;
t.a = 0;
}
}
} t;
void bartfoo() {
t.foo();
}
#endif // OMP5
// TERM_DEBUG-LABEL: bar
int bar() {return 0;};

View File

@ -539,7 +539,7 @@ struct S1 {
short int c[2][n];
#ifdef OMP5
#pragma omp target parallel for simd if(n>60)
#pragma omp target parallel for simd if(n>60) nontemporal(a)
#else
#pragma omp target parallel for simd if(target: n>60)
#endif // OMP5
@ -837,6 +837,9 @@ int bar(int n){
// OMP45: define internal {{.*}}void [[OMP_OUTLINED5]](i32* noalias %.global_tid., i32* noalias %.bound_tid., [[S1]]* %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i16* {{.+}})
// OMP50: define internal {{.*}}void [[OMP_OUTLINED5]](i32* noalias %.global_tid., i32* noalias %.bound_tid., [[S1]]* %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i16* {{.+}}, i[[SZ]] %{{.+}})
// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function.
// OMP45-NOT: !nontemporal
// OMP50: store double{{.*}}!nontemporal
// OMP50: load double{{.*}}!nontemporal
// CHECK: define internal void [[HVT6]]

View File

@ -85,8 +85,8 @@
// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i64] [i64 800, i64 800, i64 547]
// CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [4 x i64] [i64 4, i64 2, i64 1, i64 40]
// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i64] [i64 800, i64 800, i64 800, i64 547]
// OMP45-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [6 x i64] [i64 32, i64 281474976711171, i64 800, i64 800, i64 800, i64 547]
// OMP50-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [7 x i64] [i64 32, i64 281474976711171, i64 800, i64 800, i64 800, i64 547, i64 800]
// OMP45-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i64] [i64 547, i64 800, i64 800, i64 800, i64 547]
// OMP50-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [6 x i64] [i64 547, i64 800, i64 800, i64 800, i64 547, i64 800]
// CHECK-DAG: @{{.*}} = weak constant i8 0
// CHECK-DAG: @{{.*}} = weak constant i8 0
// CHECK-DAG: @{{.*}} = weak constant i8 0
@ -461,7 +461,11 @@ struct S1 {
int b = n+1;
short int c[2][n];
#pragma omp target simd if(n>60)
#ifdef OMP5
#pragma omp target simd if(n>60) nontemporal(a) private(a)
#else
#pragma omp target simd if(n>60) private(a)
#endif // OMP5
for (unsigned long long it = 2000; it >= 600; it -= 400) {
this->a = (double)b + 1.5;
c[1][1] = ++a;
@ -519,96 +523,84 @@ int bar(int n){
// CHECK-32: [[CSZSIZE:%.+]] = mul nuw i32 [[CELEMSIZE2]], 2
// CHECK-32: [[CSIZE:%.+]] = sext i32 [[CSZSIZE]] to i64
// OMP45-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 1)
// OMP50-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 7, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([7 x i64], [7 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 1)
// OMP45-DAG: [[BPR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP:%.+]], i32 0, i32 0
// OMP45-DAG: [[PR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P:%.+]], i32 0, i32 0
// OMP45-DAG: [[SR]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S:%.+]], i32 0, i32 0
// OMP45-DAG: [[SADDR0:%.+]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S]], i32 [[IDX0:[0-9]+]]
// OMP45-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP]], i32 [[IDX0]]
// OMP45-DAG: [[PADDR0:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P]], i32 [[IDX0]]
// OMP45-DAG: [[SADDR1:%.+]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S]], i32 [[IDX1:[0-9]+]]
// OMP45-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP]], i32 [[IDX1]]
// OMP45-DAG: [[PADDR1:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P]], i32 [[IDX1]]
// OMP45-DAG: [[SADDR2:%.+]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S]], i32 [[IDX2:[0-9]+]]
// OMP45-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP]], i32 [[IDX2]]
// OMP45-DAG: [[PADDR2:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P]], i32 [[IDX2]]
// OMP45-DAG: [[SADDR3:%.+]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S]], i32 [[IDX3:[0-9]+]]
// OMP45-DAG: [[BPADDR3:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP]], i32 [[IDX3]]
// OMP45-DAG: [[PADDR3:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P]], i32 [[IDX3]]
// OMP45-DAG: [[SADDR4:%.+]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S]], i32 [[IDX4:[0-9]+]]
// OMP45-DAG: [[BPADDR4:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP]], i32 [[IDX4]]
// OMP45-DAG: [[PADDR4:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P]], i32 [[IDX4]]
// OMP45-DAG: [[SADDR5:%.+]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S]], i32 [[IDX5:[0-9]+]]
// OMP45-DAG: [[BPADDR5:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP]], i32 [[IDX5]]
// OMP45-DAG: [[PADDR5:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P]], i32 [[IDX5]]
// OMP50-DAG: [[BPR]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[BP:%.+]], i32 0, i32 0
// OMP50-DAG: [[PR]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[P:%.+]], i32 0, i32 0
// OMP50-DAG: [[SR]] = getelementptr inbounds [7 x i64], [7 x i64]* [[S:%.+]], i32 0, i32 0
// OMP50-DAG: [[SADDR0:%.+]] = getelementptr inbounds [7 x i64], [7 x i64]* [[S]], i32 [[IDX0:[0-9]+]]
// OMP50-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[BP]], i32 [[IDX0]]
// OMP50-DAG: [[PADDR0:%.+]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[P]], i32 [[IDX0]]
// OMP50-DAG: [[SADDR1:%.+]] = getelementptr inbounds [7 x i64], [7 x i64]* [[S]], i32 [[IDX1:[0-9]+]]
// OMP50-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[BP]], i32 [[IDX1]]
// OMP50-DAG: [[PADDR1:%.+]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[P]], i32 [[IDX1]]
// OMP50-DAG: [[SADDR2:%.+]] = getelementptr inbounds [7 x i64], [7 x i64]* [[S]], i32 [[IDX2:[0-9]+]]
// OMP50-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[BP]], i32 [[IDX2]]
// OMP50-DAG: [[PADDR2:%.+]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[P]], i32 [[IDX2]]
// OMP50-DAG: [[SADDR3:%.+]] = getelementptr inbounds [7 x i64], [7 x i64]* [[S]], i32 [[IDX3:[0-9]+]]
// OMP50-DAG: [[BPADDR3:%.+]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[BP]], i32 [[IDX3]]
// OMP50-DAG: [[PADDR3:%.+]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[P]], i32 [[IDX3]]
// OMP50-DAG: [[SADDR4:%.+]] = getelementptr inbounds [7 x i64], [7 x i64]* [[S]], i32 [[IDX4:[0-9]+]]
// OMP50-DAG: [[BPADDR4:%.+]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[BP]], i32 [[IDX4]]
// OMP50-DAG: [[PADDR4:%.+]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[P]], i32 [[IDX4]]
// OMP50-DAG: [[SADDR5:%.+]] = getelementptr inbounds [7 x i64], [7 x i64]* [[S]], i32 [[IDX5:[0-9]+]]
// OMP50-DAG: [[BPADDR5:%.+]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[BP]], i32 [[IDX5]]
// OMP50-DAG: [[PADDR5:%.+]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[P]], i32 [[IDX5]]
// OMP50-DAG: [[SADDR6:%.+]] = getelementptr inbounds [7 x i64], [7 x i64]* [[S]], i32 [[IDX6:[0-9]+]]
// OMP50-DAG: [[BPADDR6:%.+]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[BP]], i32 [[IDX6]]
// OMP50-DAG: [[PADDR6:%.+]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[P]], i32 [[IDX6]]
// OMP45-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 1)
// OMP50-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 1)
// OMP45-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
// OMP45-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
// OMP45-DAG: [[SR]] = getelementptr inbounds [5 x i64], [5 x i64]* [[S:%.+]], i32 0, i32 0
// OMP45-DAG: [[SADDR0:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[S]], i32 [[IDX0:[0-9]+]]
// OMP45-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX0]]
// OMP45-DAG: [[PADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX0]]
// OMP45-DAG: [[SADDR1:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[S]], i32 [[IDX1:[0-9]+]]
// OMP45-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX1]]
// OMP45-DAG: [[PADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX1]]
// OMP45-DAG: [[SADDR2:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[S]], i32 [[IDX2:[0-9]+]]
// OMP45-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX2]]
// OMP45-DAG: [[PADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX2]]
// OMP45-DAG: [[SADDR3:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[S]], i32 [[IDX3:[0-9]+]]
// OMP45-DAG: [[BPADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX3]]
// OMP45-DAG: [[PADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX3]]
// OMP45-DAG: [[SADDR4:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[S]], i32 [[IDX4:[0-9]+]]
// OMP45-DAG: [[BPADDR4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX4]]
// OMP45-DAG: [[PADDR4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX4]]
// OMP50-DAG: [[BPR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP:%.+]], i32 0, i32 0
// OMP50-DAG: [[PR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P:%.+]], i32 0, i32 0
// OMP50-DAG: [[SR]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S:%.+]], i32 0, i32 0
// OMP50-DAG: [[SADDR0:%.+]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S]], i32 [[IDX0:[0-9]+]]
// OMP50-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP]], i32 [[IDX0]]
// OMP50-DAG: [[PADDR0:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P]], i32 [[IDX0]]
// OMP50-DAG: [[SADDR1:%.+]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S]], i32 [[IDX1:[0-9]+]]
// OMP50-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP]], i32 [[IDX1]]
// OMP50-DAG: [[PADDR1:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P]], i32 [[IDX1]]
// OMP50-DAG: [[SADDR2:%.+]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S]], i32 [[IDX2:[0-9]+]]
// OMP50-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP]], i32 [[IDX2]]
// OMP50-DAG: [[PADDR2:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P]], i32 [[IDX2]]
// OMP50-DAG: [[SADDR3:%.+]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S]], i32 [[IDX3:[0-9]+]]
// OMP50-DAG: [[BPADDR3:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP]], i32 [[IDX3]]
// OMP50-DAG: [[PADDR3:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P]], i32 [[IDX3]]
// OMP50-DAG: [[SADDR4:%.+]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S]], i32 [[IDX4:[0-9]+]]
// OMP50-DAG: [[BPADDR4:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP]], i32 [[IDX4]]
// OMP50-DAG: [[PADDR4:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P]], i32 [[IDX4]]
// OMP50-DAG: [[SADDR5:%.+]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S]], i32 [[IDX5:[0-9]+]]
// OMP50-DAG: [[BPADDR5:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP]], i32 [[IDX5]]
// OMP50-DAG: [[PADDR5:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P]], i32 [[IDX5]]
// The names below are not necessarily consistent with the names used for the
// addresses above as some are repeated.
// CHECK-DAG: store [[S1]]* %{{.+}}, [[S1]]** [[CBPADDR0:%.+]],
// CHECK-DAG: store double* %{{.+}}, double** [[CPADDR0:%.+]],
// CHECK-DAG: store [[S1]]* %{{.+}}, [[S1]]** [[CPADDR0:%.+]],
// CHECK-DAG: [[CBPADDR0]] = bitcast i8** {{%[^,]+}} to [[S1]]**
// CHECK-DAG: [[CPADDR0]] = bitcast i8** {{%[^,]+}} to double**
// CHECK-DAG: store i64 %{{.+}}, i64* {{%[^,]+}}
// CHECK-DAG: store [[S1]]* %{{.+}}, [[S1]]** [[CBPADDR1:%.+]],
// CHECK-DAG: store double* %{{.+}}, double** [[CPADDR1:%.+]],
// CHECK-DAG: [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to [[S1]]**
// CHECK-DAG: [[CPADDR1]] = bitcast i8** {{%[^,]+}} to double**
// CHECK-DAG: [[CPADDR0]] = bitcast i8** {{%[^,]+}} to [[S1]]**
// CHECK-DAG: store i64 {{4|8}}, i64* {{%[^,]+}}
// CHECK-DAG: store i[[SZ]] [[B_CVAL]], i[[SZ]]* [[CBPADDR2:%.+]],
// CHECK-DAG: store i[[SZ]] [[B_CVAL]], i[[SZ]]* [[CPADDR2:%.+]],
// CHECK-DAG: [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
// CHECK-DAG: [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
// CHECK-DAG: store i[[SZ]] [[B_CVAL]], i[[SZ]]* [[CBPADDR1:%.+]],
// CHECK-DAG: store i[[SZ]] [[B_CVAL]], i[[SZ]]* [[CPADDR1:%.+]],
// CHECK-DAG: [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
// CHECK-DAG: [[CPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
// CHECK-DAG: store i64 4, i64* {{%[^,]+}}
// CHECK-DAG: store i[[SZ]] 2, i[[SZ]]* [[CBPADDR3:%.+]],
// CHECK-DAG: store i[[SZ]] 2, i[[SZ]]* [[CPADDR3:%.+]],
// CHECK-DAG: store i[[SZ]] 2, i[[SZ]]* [[CBPADDR2:%.+]],
// CHECK-DAG: store i[[SZ]] 2, i[[SZ]]* [[CPADDR2:%.+]],
// CHECK-DAG: [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
// CHECK-DAG: [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
// CHECK-DAG: store i64 {{4|8}}, i64* {{%[^,]+}}
// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR3:%.+]],
// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR3:%.+]],
// CHECK-DAG: [[CBPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
// CHECK-DAG: [[CPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
// CHECK-DAG: store i64 {{4|8}}, i64* {{%[^,]+}}
// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR4:%.+]],
// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR4:%.+]],
// CHECK-DAG: [[CBPADDR4]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
// CHECK-DAG: [[CPADDR4]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
// CHECK-DAG: store i64 {{4|8}}, i64* {{%[^,]+}}
// CHECK-DAG: store i16* %{{.+}}, i16** [[CBPADDR5:%.+]],
// CHECK-DAG: store i16* %{{.+}}, i16** [[CPADDR5:%.+]],
// CHECK-DAG: [[CBPADDR5]] = bitcast i8** {{%[^,]+}} to i16**
// CHECK-DAG: [[CPADDR5]] = bitcast i8** {{%[^,]+}} to i16**
// CHECK-DAG: store i16* %{{.+}}, i16** [[CBPADDR4:%.+]],
// CHECK-DAG: store i16* %{{.+}}, i16** [[CPADDR4:%.+]],
// CHECK-DAG: [[CBPADDR4]] = bitcast i8** {{%[^,]+}} to i16**
// CHECK-DAG: [[CPADDR4]] = bitcast i8** {{%[^,]+}} to i16**
// CHECK-DAG: store i64 [[CSIZE]], i64* {{%[^,]+}}
// OMP50-DAG: store i[[SZ]] [[SIMD_COND]], i[[SZ]]* [[CBPADDR6:%.+]],
// OMP50-DAG: store i[[SZ]] [[SIMD_COND]], i[[SZ]]* [[CPADDR6:%.+]],
// OMP50-DAG: [[CBPADDR6]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
// OMP50-DAG: [[CPADDR6]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
// OMP50-DAG: store i[[SZ]] [[SIMD_COND]], i[[SZ]]* [[CBPADDR5:%.+]],
// OMP50-DAG: store i[[SZ]] [[SIMD_COND]], i[[SZ]]* [[CPADDR5:%.+]],
// OMP50-DAG: [[CBPADDR5]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
// OMP50-DAG: [[CPADDR5]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
// OMP50-DAG: store i64 1, i64* {{%[^,]+}}
// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
@ -738,6 +730,10 @@ int bar(int n){
// OMP50-DAG: [[CONV_COND:%.+]] = bitcast i[[SZ]]* [[LOCAL_SIMD_COND_CASTED]] to i8*
// OMP50-DAG: [[SIMD_COND:%.+]] = load i8, i8* [[CONV_COND]],
// OMP50-DAG: trunc i8 [[SIMD_COND]] to i1
// OMP45-NOT: !nontemporal
// OMP50: store double {{.*}}!nontemporal
// OMP50: load double, {{.*}}!nontemporal
// OMP50: store double {{.*}}!nontemporal
// CHECK: define internal void [[HVT6]]
// Create local storage for each capture.

View File

@ -35,20 +35,25 @@ void gtid_test() {
// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
#ifdef OMP5
#pragma omp target teams distribute parallel for simd if(simd: true)
#pragma omp target teams distribute parallel for simd if(simd: true) nontemporal(Arg)
#else
#pragma omp target teams distribute parallel for simd
#endif // OMP5
for(int i = 0 ; i < 100; i++) {}
for (int i = 0; i < 100; i++) {
Arg = 0;
}
// CHECK: define internal void [[OFFLOADING_FUN_0]](
// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}})
// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}})
// CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]](
// CHECK: call void @__kmpc_for_static_init_4(
// CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_0:@.+]] to void
// OMP50: load i32,{{.*}}!nontemporal
// CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 3, {{.+}}* [[OMP_OUTLINED_0:@.+]] to void
// CHECK: call void @__kmpc_for_static_fini(
// CHECK: define{{.+}} void [[OMP_OUTLINED_0]](
// CHECK: call void @__kmpc_for_static_init_4(
// OMP45-NOT: !nontemporal
// OMP50: store i32 0,{{.*}}!nontemporal
// CHECK: call void @__kmpc_for_static_fini(
// CHECK: ret
#pragma omp target teams distribute parallel for simd if (parallel: false)

View File

@ -165,7 +165,7 @@ int foo(int n) {
// CHECK: call void [[HVT1:@.+]](i[[SZ]] {{[^,]+}})
#ifdef OMP5
#pragma omp target teams distribute simd if(target: 0) safelen(32) linear(a) if(simd: 1)
#pragma omp target teams distribute simd if(target: 0) safelen(32) linear(a) if(simd: 1) nontemporal(a)
#else
#pragma omp target teams distribute simd if(target: 0) safelen(32) linear(a)
#endif // OMP5
@ -395,6 +395,8 @@ int foo(int n) {
// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align
// CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
// CHECK-64: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32*
// OMP45-NOT: !nontemporal
// OMP50: load i32,{{.*}}!nontemporal
// CHECK-64: store i32 10, i32* [[AA_CADDR]], align
// CHECK-32: store i32 10, i32* [[AA_ADDR]], align
// CHECK: ret void

View File

@ -177,16 +177,16 @@ struct SS{
// CK3: define {{.*}}i32 @{{.+}}foo{{.+}}(
int foo(void) {
// CK3: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* %{{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
// CK3: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* %{{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
// CK3: call void @[[OFFL1:.+]]([[SSI]]* %{{.+}})
#pragma omp target
#ifdef OMP5
#pragma omp teams distribute simd if(b)
#pragma omp teams distribute simd if(b) nontemporal(a, b)
#else
#pragma omp teams distribute simd
#endif // OMP5
for(int i = 0; i < X; i++) {
a[i] = (T)0;
a[i] = (T)b;
}
// outlined target region
@ -197,6 +197,8 @@ struct SS{
// CK3: define internal void @[[OUTL1]]({{.+}})
// CK3: call void @__kmpc_for_static_init_4(
// OMP3_45-NOT: !nontemporal
// OMP3_50: load float,{{.*}}!nontemporal
// CK3: call void @__kmpc_for_static_fini(
// CK3: ret void

View File

@ -2458,6 +2458,8 @@ void OMPClauseEnqueue::VisitOMPIsDevicePtrClause(const OMPIsDevicePtrClause *C)
void OMPClauseEnqueue::VisitOMPNontemporalClause(
const OMPNontemporalClause *C) {
VisitOMPClauseList(C);
for (const auto *E : C->private_refs())
Visitor->AddStmt(E);
}
}