[OPENMP50]Codegen for reduction clauses with 'task' modifier.

Summary:
Added codegen for reduction clause with task modifier.
```
  #pragma omp ... reduction(task, +: a)
  {
  #pragma omp ... in_reduction(+: a)
  }
```
is translated into something like this:
```
  #pragma omp ... reduction(+:a)
  {
    struct red_input_t {
      void *reduce_shar;
      void *reduce_orig;
      size_t reduce_size;
      void *reduce_init;
      void *reduce_fini;
      void *reduce_comb;
      unsigned flags;
    } r_var;
    r_var.reduce_shar = &a;
    r_var.reduce_orig = &original a;
    r_var.reduce_size = sizeof(a);
    r_var.reduce_init = [](void* l,void*){return *(int*)l=0;};
    r_var.reduce_fini = nullptr;
    r_var.reduce_comb = [](void* l,void* r){return *(int*)l += *(int)r;};
    void *tg = __kmpc_taskred_modifier_init(<loc_addr>,<gtid>,
      <flag - 0 for parallel, 1 for worksharing>,
      <1 - number of reduction elements>,
      &r_var);
    {
    #pragma omp ... in_reduction(+: a) firstprivate(tg)
    ...
    }
    __kmpc_task_reduction_modifier_fini(<loc_addr>,<gtid>,
      <flag - 0 for parallel, 1 for worksharing>);
  }
```

Reviewers: jdoerfert

Subscribers: yaxunl, guansong, jfb, cfe-commits, caomhin

Tags: #clang

Differential Revision: https://reviews.llvm.org/D79034
This commit is contained in:
Alexey Bataev 2020-04-24 09:56:29 -04:00
parent 07d448135f
commit 8c2f4e0e85
19 changed files with 1975 additions and 120 deletions

View File

@ -356,6 +356,9 @@ public:
///
class OMPParallelDirective : public OMPExecutableDirective {
friend class ASTStmtReader;
/// Special reference expression for handling task reduction. Used to store
/// the taskgroup descriptor returned by the runtime functions.
Expr *TaskRedRef = nullptr;
/// true if the construct has inner cancel directive.
bool HasCancel;
@ -381,6 +384,9 @@ class OMPParallelDirective : public OMPExecutableDirective {
SourceLocation(), NumClauses, 1),
HasCancel(false) {}
/// Sets special task reduction descriptor.
void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
/// Set cancel state.
void setHasCancel(bool Has) { HasCancel = Has; }
@ -392,11 +398,14 @@ public:
/// \param EndLoc Ending Location of the directive.
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement associated with the directive.
/// \param TaskRedRef Task reduction special reference expression to handle
/// taskgroup descriptor.
/// \param HasCancel true if this directive has inner cancel directive.
///
static OMPParallelDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel);
ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
bool HasCancel);
/// Creates an empty directive with the place for \a N clauses.
///
@ -406,6 +415,10 @@ public:
static OMPParallelDirective *CreateEmpty(const ASTContext &C,
unsigned NumClauses, EmptyShell);
/// Returns special task reduction reference expression.
Expr *getTaskReductionRefExpr() { return TaskRedRef; }
const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
/// Return true if current directive has inner cancel directive.
bool hasCancel() const { return HasCancel; }
@ -1258,7 +1271,9 @@ public:
///
class OMPForDirective : public OMPLoopDirective {
friend class ASTStmtReader;
/// Special reference expression for handling task reduction. Used to store
/// the taskgroup descriptor returned by the runtime functions.
Expr *TaskRedRef = nullptr;
/// true if current directive has inner cancel directive.
bool HasCancel;
@ -1286,6 +1301,9 @@ class OMPForDirective : public OMPLoopDirective {
NumClauses),
HasCancel(false) {}
/// Sets special task reduction descriptor.
void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
/// Set cancel state.
void setHasCancel(bool Has) { HasCancel = Has; }
@ -1299,13 +1317,15 @@ public:
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement, associated with the directive.
/// \param Exprs Helper expressions for CodeGen.
/// \param TaskRedRef Task reduction special reference expression to handle
/// taskgroup descriptor.
/// \param HasCancel true if current directive has inner cancel directive.
///
static OMPForDirective *Create(const ASTContext &C, SourceLocation StartLoc,
SourceLocation EndLoc, unsigned CollapsedNum,
ArrayRef<OMPClause *> Clauses,
Stmt *AssociatedStmt, const HelperExprs &Exprs,
bool HasCancel);
Expr *TaskRedRef, bool HasCancel);
/// Creates an empty directive with the place
/// for \a NumClauses clauses.
@ -1317,6 +1337,10 @@ public:
static OMPForDirective *CreateEmpty(const ASTContext &C, unsigned NumClauses,
unsigned CollapsedNum, EmptyShell);
/// Returns special task reduction reference expression.
Expr *getTaskReductionRefExpr() { return TaskRedRef; }
const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
/// Return true if current directive has inner cancel directive.
bool hasCancel() const { return HasCancel; }
@ -1403,6 +1427,9 @@ public:
class OMPSectionsDirective : public OMPExecutableDirective {
friend class ASTStmtReader;
/// Special reference expression for handling task reduction. Used to store
/// the taskgroup descriptor returned by the runtime functions.
Expr *TaskRedRef = nullptr;
/// true if current directive has inner cancel directive.
bool HasCancel;
@ -1429,6 +1456,9 @@ class OMPSectionsDirective : public OMPExecutableDirective {
SourceLocation(), NumClauses, 1),
HasCancel(false) {}
/// Sets special task reduction descriptor.
void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
/// Set cancel state.
void setHasCancel(bool Has) { HasCancel = Has; }
@ -1440,11 +1470,14 @@ public:
/// \param EndLoc Ending Location of the directive.
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement, associated with the directive.
/// \param TaskRedRef Task reduction special reference expression to handle
/// taskgroup descriptor.
/// \param HasCancel true if current directive has inner directive.
///
static OMPSectionsDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel);
ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
bool HasCancel);
/// Creates an empty directive with the place for \a NumClauses
/// clauses.
@ -1455,6 +1488,10 @@ public:
static OMPSectionsDirective *CreateEmpty(const ASTContext &C,
unsigned NumClauses, EmptyShell);
/// Returns special task reduction reference expression.
Expr *getTaskReductionRefExpr() { return TaskRedRef; }
const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
/// Return true if current directive has inner cancel directive.
bool hasCancel() const { return HasCancel; }
@ -1715,6 +1752,9 @@ public:
class OMPParallelForDirective : public OMPLoopDirective {
friend class ASTStmtReader;
/// Special reference expression for handling task reduction. Used to store
/// the taskgroup descriptor returned by the runtime functions.
Expr *TaskRedRef = nullptr;
/// true if current region has inner cancel directive.
bool HasCancel;
@ -1743,6 +1783,9 @@ class OMPParallelForDirective : public OMPLoopDirective {
SourceLocation(), CollapsedNum, NumClauses),
HasCancel(false) {}
/// Sets special task reduction descriptor.
void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
/// Set cancel state.
void setHasCancel(bool Has) { HasCancel = Has; }
@ -1756,12 +1799,15 @@ public:
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement, associated with the directive.
/// \param Exprs Helper expressions for CodeGen.
/// \param TaskRedRef Task reduction special reference expression to handle
/// taskgroup descriptor.
/// \param HasCancel true if current directive has inner cancel directive.
///
static OMPParallelForDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel);
Stmt *AssociatedStmt, const HelperExprs &Exprs, Expr *TaskRedRef,
bool HasCancel);
/// Creates an empty directive with the place
/// for \a NumClauses clauses.
@ -1775,6 +1821,10 @@ public:
unsigned CollapsedNum,
EmptyShell);
/// Returns special task reduction reference expression.
Expr *getTaskReductionRefExpr() { return TaskRedRef; }
const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
/// Return true if current directive has inner cancel directive.
bool hasCancel() const { return HasCancel; }
@ -1863,6 +1913,10 @@ public:
class OMPParallelMasterDirective : public OMPExecutableDirective {
friend class ASTStmtReader;
/// Special reference expression for handling task reduction. Used to store
/// the taskgroup descriptor returned by the runtime functions.
Expr *TaskRedRef = nullptr;
OMPParallelMasterDirective(SourceLocation StartLoc, SourceLocation EndLoc,
unsigned NumClauses)
: OMPExecutableDirective(this, OMPParallelMasterDirectiveClass,
@ -1875,6 +1929,9 @@ class OMPParallelMasterDirective : public OMPExecutableDirective {
SourceLocation(), SourceLocation(), NumClauses,
1) {}
/// Sets special task reduction descriptor.
void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
public:
/// Creates directive with a list of \a Clauses.
///
@ -1883,10 +1940,12 @@ public:
/// \param EndLoc Ending Location of the directive.
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement, associated with the directive.
/// \param TaskRedRef Task reduction special reference expression to handle
/// taskgroup descriptor.
///
static OMPParallelMasterDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt);
ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef);
/// Creates an empty directive with the place for \a NumClauses
/// clauses.
@ -1897,6 +1956,10 @@ public:
static OMPParallelMasterDirective *
CreateEmpty(const ASTContext &C, unsigned NumClauses, EmptyShell);
/// Returns special task reduction reference expression.
Expr *getTaskReductionRefExpr() { return TaskRedRef; }
const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
static bool classof(const Stmt *T) {
return T->getStmtClass() == OMPParallelMasterDirectiveClass;
}
@ -1914,6 +1977,9 @@ public:
class OMPParallelSectionsDirective : public OMPExecutableDirective {
friend class ASTStmtReader;
/// Special reference expression for handling task reduction. Used to store
/// the taskgroup descriptor returned by the runtime functions.
Expr *TaskRedRef = nullptr;
/// true if current directive has inner cancel directive.
bool HasCancel;
@ -1941,6 +2007,9 @@ class OMPParallelSectionsDirective : public OMPExecutableDirective {
1),
HasCancel(false) {}
/// Sets special task reduction descriptor.
void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
/// Set cancel state.
void setHasCancel(bool Has) { HasCancel = Has; }
@ -1952,11 +2021,14 @@ public:
/// \param EndLoc Ending Location of the directive.
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement, associated with the directive.
/// \param TaskRedRef Task reduction special reference expression to handle
/// taskgroup descriptor.
/// \param HasCancel true if current directive has inner cancel directive.
///
static OMPParallelSectionsDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel);
ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
bool HasCancel);
/// Creates an empty directive with the place for \a NumClauses
/// clauses.
@ -1967,6 +2039,10 @@ public:
static OMPParallelSectionsDirective *
CreateEmpty(const ASTContext &C, unsigned NumClauses, EmptyShell);
/// Returns special task reduction reference expression.
Expr *getTaskReductionRefExpr() { return TaskRedRef; }
const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
/// Return true if current directive has inner cancel directive.
bool hasCancel() const { return HasCancel; }
@ -2805,6 +2881,9 @@ public:
///
class OMPTargetParallelDirective : public OMPExecutableDirective {
friend class ASTStmtReader;
/// Special reference expression for handling task reduction. Used to store
/// the taskgroup descriptor returned by the runtime functions.
Expr *TaskRedRef = nullptr;
/// true if the construct has inner cancel directive.
bool HasCancel = false;
@ -2830,6 +2909,8 @@ class OMPTargetParallelDirective : public OMPExecutableDirective {
SourceLocation(), SourceLocation(), NumClauses,
/*NumChildren=*/1) {}
/// Sets special task reduction descriptor.
void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
/// Set cancel state.
void setHasCancel(bool Has) { HasCancel = Has; }
@ -2841,11 +2922,14 @@ public:
/// \param EndLoc Ending Location of the directive.
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement, associated with the directive.
/// \param TaskRedRef Task reduction special reference expression to handle
/// taskgroup descriptor.
/// \param HasCancel true if this directive has inner cancel directive.
///
static OMPTargetParallelDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel);
ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
bool HasCancel);
/// Creates an empty directive with the place for \a NumClauses
/// clauses.
@ -2856,6 +2940,10 @@ public:
static OMPTargetParallelDirective *
CreateEmpty(const ASTContext &C, unsigned NumClauses, EmptyShell);
/// Returns special task reduction reference expression.
Expr *getTaskReductionRefExpr() { return TaskRedRef; }
const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
/// Return true if current directive has inner cancel directive.
bool hasCancel() const { return HasCancel; }
@ -2876,6 +2964,9 @@ public:
class OMPTargetParallelForDirective : public OMPLoopDirective {
friend class ASTStmtReader;
/// Special reference expression for handling task reduction. Used to store
/// the taskgroup descriptor returned by the runtime functions.
Expr *TaskRedRef = nullptr;
/// true if current region has inner cancel directive.
bool HasCancel;
@ -2905,6 +2996,9 @@ class OMPTargetParallelForDirective : public OMPLoopDirective {
SourceLocation(), CollapsedNum, NumClauses),
HasCancel(false) {}
/// Sets special task reduction descriptor.
void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
/// Set cancel state.
void setHasCancel(bool Has) { HasCancel = Has; }
@ -2918,12 +3012,15 @@ public:
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement, associated with the directive.
/// \param Exprs Helper expressions for CodeGen.
/// \param TaskRedRef Task reduction special reference expression to handle
/// taskgroup descriptor.
/// \param HasCancel true if current directive has inner cancel directive.
///
static OMPTargetParallelForDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel);
Stmt *AssociatedStmt, const HelperExprs &Exprs, Expr *TaskRedRef,
bool HasCancel);
/// Creates an empty directive with the place
/// for \a NumClauses clauses.
@ -2937,6 +3034,10 @@ public:
unsigned CollapsedNum,
EmptyShell);
/// Returns special task reduction reference expression.
Expr *getTaskReductionRefExpr() { return TaskRedRef; }
const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
/// Return true if current directive has inner cancel directive.
bool hasCancel() const { return HasCancel; }
@ -3709,6 +3810,9 @@ public:
///
class OMPDistributeParallelForDirective : public OMPLoopDirective {
friend class ASTStmtReader;
/// Special reference expression for handling task reduction. Used to store
/// the taskgroup descriptor returned by the runtime functions.
Expr *TaskRedRef = nullptr;
/// true if the construct has inner cancel directive.
bool HasCancel = false;
@ -3740,6 +3844,9 @@ class OMPDistributeParallelForDirective : public OMPLoopDirective {
NumClauses),
HasCancel(false) {}
/// Sets special task reduction descriptor.
void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
/// Set cancel state.
void setHasCancel(bool Has) { HasCancel = Has; }
@ -3753,12 +3860,15 @@ public:
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement, associated with the directive.
/// \param Exprs Helper expressions for CodeGen.
/// \param TaskRedRef Task reduction special reference expression to handle
/// taskgroup descriptor.
/// \param HasCancel true if this directive has inner cancel directive.
///
static OMPDistributeParallelForDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel);
Stmt *AssociatedStmt, const HelperExprs &Exprs, Expr *TaskRedRef,
bool HasCancel);
/// Creates an empty directive with the place
/// for \a NumClauses clauses.
@ -3772,6 +3882,10 @@ public:
unsigned CollapsedNum,
EmptyShell);
/// Returns special task reduction reference expression.
Expr *getTaskReductionRefExpr() { return TaskRedRef; }
const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
/// Return true if current directive has inner cancel directive.
bool hasCancel() const { return HasCancel; }
@ -4274,6 +4388,9 @@ public:
///
class OMPTeamsDistributeParallelForDirective final : public OMPLoopDirective {
friend class ASTStmtReader;
/// Special reference expression for handling task reduction. Used to store
/// the taskgroup descriptor returned by the runtime functions.
Expr *TaskRedRef = nullptr;
/// true if the construct has inner cancel directive.
bool HasCancel = false;
@ -4306,6 +4423,9 @@ class OMPTeamsDistributeParallelForDirective final : public OMPLoopDirective {
NumClauses),
HasCancel(false) {}
/// Sets special task reduction descriptor.
void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
/// Set cancel state.
void setHasCancel(bool Has) { HasCancel = Has; }
@ -4319,12 +4439,15 @@ public:
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement, associated with the directive.
/// \param Exprs Helper expressions for CodeGen.
/// \param TaskRedRef Task reduction special reference expression to handle
/// taskgroup descriptor.
/// \param HasCancel true if this directive has inner cancel directive.
///
static OMPTeamsDistributeParallelForDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel);
Stmt *AssociatedStmt, const HelperExprs &Exprs, Expr *TaskRedRef,
bool HasCancel);
/// Creates an empty directive with the place for \a NumClauses clauses.
///
@ -4336,6 +4459,10 @@ public:
CreateEmpty(const ASTContext &C, unsigned NumClauses, unsigned CollapsedNum,
EmptyShell);
/// Returns special task reduction reference expression.
Expr *getTaskReductionRefExpr() { return TaskRedRef; }
const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
/// Return true if current directive has inner cancel directive.
bool hasCancel() const { return HasCancel; }
@ -4483,6 +4610,9 @@ public:
class OMPTargetTeamsDistributeParallelForDirective final
: public OMPLoopDirective {
friend class ASTStmtReader;
/// Special reference expression for handling task reduction. Used to store
/// the taskgroup descriptor returned by the runtime functions.
Expr *TaskRedRef = nullptr;
/// true if the construct has inner cancel directive.
bool HasCancel = false;
@ -4516,6 +4646,9 @@ class OMPTargetTeamsDistributeParallelForDirective final
SourceLocation(), SourceLocation(), CollapsedNum, NumClauses),
HasCancel(false) {}
/// Sets special task reduction descriptor.
void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
/// Set cancel state.
void setHasCancel(bool Has) { HasCancel = Has; }
@ -4529,12 +4662,15 @@ public:
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement, associated with the directive.
/// \param Exprs Helper expressions for CodeGen.
/// \param TaskRedRef Task reduction special reference expression to handle
/// taskgroup descriptor.
/// \param HasCancel true if this directive has inner cancel directive.
///
static OMPTargetTeamsDistributeParallelForDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel);
Stmt *AssociatedStmt, const HelperExprs &Exprs, Expr *TaskRedRef,
bool HasCancel);
/// Creates an empty directive with the place for \a NumClauses clauses.
///
@ -4546,6 +4682,10 @@ public:
CreateEmpty(const ASTContext &C, unsigned NumClauses, unsigned CollapsedNum,
EmptyShell);
/// Returns special task reduction reference expression.
Expr *getTaskReductionRefExpr() { return TaskRedRef; }
const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
/// Return true if current directive has inner cancel directive.
bool hasCancel() const { return HasCancel; }

View File

@ -161,7 +161,8 @@ void OMPLoopDirective::setFinalsConditions(ArrayRef<Expr *> A) {
OMPParallelDirective *OMPParallelDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel) {
ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
bool HasCancel) {
unsigned Size =
llvm::alignTo(sizeof(OMPParallelDirective), alignof(OMPClause *));
void *Mem =
@ -170,6 +171,7 @@ OMPParallelDirective *OMPParallelDirective::Create(
new (Mem) OMPParallelDirective(StartLoc, EndLoc, Clauses.size());
Dir->setClauses(Clauses);
Dir->setAssociatedStmt(AssociatedStmt);
Dir->setTaskReductionRefExpr(TaskRedRef);
Dir->setHasCancel(HasCancel);
return Dir;
}
@ -227,11 +229,10 @@ OMPSimdDirective *OMPSimdDirective::CreateEmpty(const ASTContext &C,
return new (Mem) OMPSimdDirective(CollapsedNum, NumClauses);
}
OMPForDirective *
OMPForDirective::Create(const ASTContext &C, SourceLocation StartLoc,
SourceLocation EndLoc, unsigned CollapsedNum,
ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
const HelperExprs &Exprs, bool HasCancel) {
OMPForDirective *OMPForDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) {
unsigned Size = llvm::alignTo(sizeof(OMPForDirective), alignof(OMPClause *));
void *Mem =
C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
@ -264,6 +265,7 @@ OMPForDirective::Create(const ASTContext &C, SourceLocation StartLoc,
Dir->setDependentInits(Exprs.DependentInits);
Dir->setFinalsConditions(Exprs.FinalsConditions);
Dir->setPreInits(Exprs.PreInits);
Dir->setTaskReductionRefExpr(TaskRedRef);
Dir->setHasCancel(HasCancel);
return Dir;
}
@ -334,7 +336,8 @@ OMPForSimdDirective *OMPForSimdDirective::CreateEmpty(const ASTContext &C,
OMPSectionsDirective *OMPSectionsDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel) {
ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
bool HasCancel) {
unsigned Size =
llvm::alignTo(sizeof(OMPSectionsDirective), alignof(OMPClause *));
void *Mem =
@ -343,6 +346,7 @@ OMPSectionsDirective *OMPSectionsDirective::Create(
new (Mem) OMPSectionsDirective(StartLoc, EndLoc, Clauses.size());
Dir->setClauses(Clauses);
Dir->setAssociatedStmt(AssociatedStmt);
Dir->setTaskReductionRefExpr(TaskRedRef);
Dir->setHasCancel(HasCancel);
return Dir;
}
@ -449,7 +453,7 @@ OMPCriticalDirective *OMPCriticalDirective::CreateEmpty(const ASTContext &C,
OMPParallelForDirective *OMPParallelForDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
const HelperExprs &Exprs, bool HasCancel) {
const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) {
unsigned Size =
llvm::alignTo(sizeof(OMPParallelForDirective), alignof(OMPClause *));
void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
@ -483,6 +487,7 @@ OMPParallelForDirective *OMPParallelForDirective::Create(
Dir->setDependentInits(Exprs.DependentInits);
Dir->setFinalsConditions(Exprs.FinalsConditions);
Dir->setPreInits(Exprs.PreInits);
Dir->setTaskReductionRefExpr(TaskRedRef);
Dir->setHasCancel(HasCancel);
return Dir;
}
@ -552,7 +557,7 @@ OMPParallelForSimdDirective::CreateEmpty(const ASTContext &C,
OMPParallelMasterDirective *OMPParallelMasterDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt) {
ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef) {
unsigned Size =
llvm::alignTo(sizeof(OMPParallelMasterDirective), alignof(OMPClause *));
void *Mem =
@ -561,6 +566,7 @@ OMPParallelMasterDirective *OMPParallelMasterDirective::Create(
new (Mem) OMPParallelMasterDirective(StartLoc, EndLoc, Clauses.size());
Dir->setClauses(Clauses);
Dir->setAssociatedStmt(AssociatedStmt);
Dir->setTaskReductionRefExpr(TaskRedRef);
return Dir;
}
@ -576,7 +582,8 @@ OMPParallelMasterDirective *OMPParallelMasterDirective::CreateEmpty(const ASTCon
OMPParallelSectionsDirective *OMPParallelSectionsDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel) {
ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
bool HasCancel) {
unsigned Size =
llvm::alignTo(sizeof(OMPParallelSectionsDirective), alignof(OMPClause *));
void *Mem =
@ -585,6 +592,7 @@ OMPParallelSectionsDirective *OMPParallelSectionsDirective::Create(
new (Mem) OMPParallelSectionsDirective(StartLoc, EndLoc, Clauses.size());
Dir->setClauses(Clauses);
Dir->setAssociatedStmt(AssociatedStmt);
Dir->setTaskReductionRefExpr(TaskRedRef);
Dir->setHasCancel(HasCancel);
return Dir;
}
@ -887,7 +895,8 @@ OMPTargetDirective *OMPTargetDirective::CreateEmpty(const ASTContext &C,
OMPTargetParallelDirective *OMPTargetParallelDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel) {
ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
bool HasCancel) {
unsigned Size =
llvm::alignTo(sizeof(OMPTargetParallelDirective), alignof(OMPClause *));
void *Mem =
@ -896,6 +905,7 @@ OMPTargetParallelDirective *OMPTargetParallelDirective::Create(
new (Mem) OMPTargetParallelDirective(StartLoc, EndLoc, Clauses.size());
Dir->setClauses(Clauses);
Dir->setAssociatedStmt(AssociatedStmt);
Dir->setTaskReductionRefExpr(TaskRedRef);
Dir->setHasCancel(HasCancel);
return Dir;
}
@ -913,7 +923,7 @@ OMPTargetParallelDirective::CreateEmpty(const ASTContext &C,
OMPTargetParallelForDirective *OMPTargetParallelForDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
const HelperExprs &Exprs, bool HasCancel) {
const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) {
unsigned Size = llvm::alignTo(sizeof(OMPTargetParallelForDirective),
alignof(OMPClause *));
void *Mem = C.Allocate(
@ -947,6 +957,7 @@ OMPTargetParallelForDirective *OMPTargetParallelForDirective::Create(
Dir->setDependentInits(Exprs.DependentInits);
Dir->setFinalsConditions(Exprs.FinalsConditions);
Dir->setPreInits(Exprs.PreInits);
Dir->setTaskReductionRefExpr(TaskRedRef);
Dir->setHasCancel(HasCancel);
return Dir;
}
@ -1457,7 +1468,7 @@ OMPTargetUpdateDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses,
OMPDistributeParallelForDirective *OMPDistributeParallelForDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
const HelperExprs &Exprs, bool HasCancel) {
const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) {
unsigned Size = llvm::alignTo(sizeof(OMPDistributeParallelForDirective),
alignof(OMPClause *));
void *Mem = C.Allocate(
@ -1506,6 +1517,7 @@ OMPDistributeParallelForDirective *OMPDistributeParallelForDirective::Create(
Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
Dir->setTaskReductionRefExpr(TaskRedRef);
Dir->HasCancel = HasCancel;
return Dir;
}
@ -1932,7 +1944,7 @@ OMPTeamsDistributeParallelForDirective *
OMPTeamsDistributeParallelForDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
const HelperExprs &Exprs, bool HasCancel) {
const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) {
auto Size = llvm::alignTo(sizeof(OMPTeamsDistributeParallelForDirective),
alignof(OMPClause *));
void *Mem = C.Allocate(
@ -1981,6 +1993,7 @@ OMPTeamsDistributeParallelForDirective::Create(
Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
Dir->setTaskReductionRefExpr(TaskRedRef);
Dir->HasCancel = HasCancel;
return Dir;
}
@ -2084,7 +2097,7 @@ OMPTargetTeamsDistributeParallelForDirective *
OMPTargetTeamsDistributeParallelForDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
const HelperExprs &Exprs, bool HasCancel) {
const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) {
auto Size =
llvm::alignTo(sizeof(OMPTargetTeamsDistributeParallelForDirective),
alignof(OMPClause *));
@ -2135,6 +2148,7 @@ OMPTargetTeamsDistributeParallelForDirective::Create(
Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
Dir->setTaskReductionRefExpr(TaskRedRef);
Dir->HasCancel = HasCancel;
return Dir;
}

View File

@ -710,6 +710,12 @@ enum OpenMPRTLFunction {
// Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
// *d);
OMPRTL__kmpc_task_reduction_get_th_data,
// Call to void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
// is_ws, int num, void *data);
OMPRTL__kmpc_taskred_modifier_init,
// Call to void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid,
// int is_ws);
OMPRTL__kmpc_task_reduction_modifier_fini,
// Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
OMPRTL__kmpc_alloc,
// Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
@ -1020,26 +1026,25 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
if (!PrivateType->isVariablyModifiedType()) {
Sizes.emplace_back(
CGF.getTypeSize(
SharedAddresses[N].first.getType().getNonReferenceType()),
CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
nullptr);
return;
}
llvm::Value *Size;
llvm::Value *SizeInChars;
auto *ElemType = cast<llvm::PointerType>(
SharedAddresses[N].first.getPointer(CGF)->getType())
auto *ElemType =
cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
->getElementType();
auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
if (AsArraySection) {
Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF),
SharedAddresses[N].first.getPointer(CGF));
Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
OrigAddresses[N].first.getPointer(CGF));
Size = CGF.Builder.CreateNUWAdd(
Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
} else {
SizeInChars = CGF.getTypeSize(
SharedAddresses[N].first.getType().getNonReferenceType());
SizeInChars =
CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
}
Sizes.emplace_back(SizeInChars, Size);
@ -2347,6 +2352,28 @@ llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
break;
}
case OMPRTL__kmpc_taskred_modifier_init: {
// Build void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
// is_ws, int num_data, void *data);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.IntTy, CGM.IntTy,
CGM.IntTy, CGM.VoidPtrTy};
auto *FnTy =
llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy,
/*Name=*/"__kmpc_taskred_modifier_init");
break;
}
case OMPRTL__kmpc_task_reduction_modifier_fini: {
// Build void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid,
// int is_ws);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.IntTy, CGM.IntTy};
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(
FnTy,
/*Name=*/"__kmpc_task_reduction_modifier_fini");
break;
}
case OMPRTL__kmpc_alloc: {
// Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
// al); omp_allocator_handle_t type is void *.
@ -6784,7 +6811,7 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
// kmp_task_red_input_t .rd_input.[Size];
Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionVars,
ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
Data.ReductionCopies, Data.ReductionOps);
for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
// kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
@ -6848,6 +6875,22 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
FlagsLVal.getType());
}
if (Data.IsReductionWithTaskMod) {
// Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
// is_ws, int num, void *data);
llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
CGM.IntTy, /*isSigned=*/true);
llvm::Value *Args[] = {
IdentTLoc, GTid,
llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
/*isSigned=*/true),
llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
TaskRedInput.getPointer(), CGM.VoidPtrTy)};
return CGF.EmitRuntimeCall(
createRuntimeFunction(OMPRTL__kmpc_taskred_modifier_init), Args);
}
// Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
llvm::Value *Args[] = {
CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
@ -6859,6 +6902,22 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
Args);
}
void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
SourceLocation Loc,
bool IsWorksharingReduction) {
// Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
// is_ws, int num, void *data);
llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
CGM.IntTy, /*isSigned=*/true);
llvm::Value *Args[] = {IdentTLoc, GTid,
llvm::ConstantInt::get(CGM.IntTy,
IsWorksharingReduction ? 1 : 0,
/*isSigned=*/true)};
(void)CGF.EmitRuntimeCall(
createRuntimeFunction(OMPRTL__kmpc_task_reduction_modifier_fini), Args);
}
void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
SourceLocation Loc,
ReductionCodeGen &RCG,
@ -12364,6 +12423,12 @@ llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
llvm_unreachable("Not supported in SIMD-only mode");
}
void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
SourceLocation Loc,
bool IsWorksharingReduction) {
llvm_unreachable("Not supported in SIMD-only mode");
}
void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
SourceLocation Loc,
ReductionCodeGen &RCG,

View File

@ -100,6 +100,7 @@ struct OMPTaskDataTy final {
SmallVector<const Expr *, 4> LastprivateVars;
SmallVector<const Expr *, 4> LastprivateCopies;
SmallVector<const Expr *, 4> ReductionVars;
SmallVector<const Expr *, 4> ReductionOrigs;
SmallVector<const Expr *, 4> ReductionCopies;
SmallVector<const Expr *, 4> ReductionOps;
struct DependData {
@ -118,6 +119,8 @@ struct OMPTaskDataTy final {
unsigned NumberOfParts = 0;
bool Tied = true;
bool Nogroup = false;
bool IsReductionWithTaskMod = false;
bool IsWorksharingReduction = false;
};
/// Class intended to support codegen of all kind of the reduction clauses.
@ -1418,18 +1421,34 @@ public:
/// should be emitted for reduction:
/// \code
///
/// _task_red_item_t red_data[n];
/// _taskred_item_t red_data[n];
/// ...
/// red_data[i].shar = &origs[i];
/// red_data[i].shar = &shareds[i];
/// red_data[i].orig = &origs[i];
/// red_data[i].size = sizeof(origs[i]);
/// red_data[i].f_init = (void*)RedInit<i>;
/// red_data[i].f_fini = (void*)RedDest<i>;
/// red_data[i].f_comb = (void*)RedOp<i>;
/// red_data[i].flags = <Flag_i>;
/// ...
/// void* tg1 = __kmpc_task_reduction_init(gtid, n, red_data);
/// void* tg1 = __kmpc_taskred_init(gtid, n, red_data);
/// \endcode
/// For reduction clause with task modifier it emits the next call:
/// \code
///
/// _taskred_item_t red_data[n];
/// ...
/// red_data[i].shar = &shareds[i];
/// red_data[i].orig = &origs[i];
/// red_data[i].size = sizeof(origs[i]);
/// red_data[i].f_init = (void*)RedInit<i>;
/// red_data[i].f_fini = (void*)RedDest<i>;
/// red_data[i].f_comb = (void*)RedOp<i>;
/// red_data[i].flags = <Flag_i>;
/// ...
/// void* tg1 = __kmpc_taskred_modifier_init(loc, gtid, is_worksharing, n,
/// red_data);
/// \endcode
/// \param LHSExprs List of LHS in \a Data.ReductionOps reduction operations.
/// \param RHSExprs List of RHS in \a Data.ReductionOps reduction operations.
/// \param Data Additional data for task generation like tiedness, final
@ -1440,6 +1459,13 @@ public:
ArrayRef<const Expr *> RHSExprs,
const OMPTaskDataTy &Data);
/// Emits the following code for reduction clause with task modifier:
/// \code
/// __kmpc_task_reduction_modifier_fini(loc, gtid, is_worksharing);
/// \endcode
virtual void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc,
bool IsWorksharingReduction);
/// Required to resolve existing problems in the runtime. Emits threadprivate
/// variables to store the size of the VLAs/array sections for
/// initializer/combiner/finalizer functions.
@ -2192,18 +2218,34 @@ public:
/// should be emitted for reduction:
/// \code
///
/// _task_red_item_t red_data[n];
/// _taskred_item_t red_data[n];
/// ...
/// red_data[i].shar = &origs[i];
/// red_data[i].shar = &shareds[i];
/// red_data[i].orig = &origs[i];
/// red_data[i].size = sizeof(origs[i]);
/// red_data[i].f_init = (void*)RedInit<i>;
/// red_data[i].f_fini = (void*)RedDest<i>;
/// red_data[i].f_comb = (void*)RedOp<i>;
/// red_data[i].flags = <Flag_i>;
/// ...
/// void* tg1 = __kmpc_task_reduction_init(gtid, n, red_data);
/// void* tg1 = __kmpc_taskred_init(gtid, n, red_data);
/// \endcode
/// For reduction clause with task modifier it emits the next call:
/// \code
///
/// _taskred_item_t red_data[n];
/// ...
/// red_data[i].shar = &shareds[i];
/// red_data[i].orig = &origs[i];
/// red_data[i].size = sizeof(origs[i]);
/// red_data[i].f_init = (void*)RedInit<i>;
/// red_data[i].f_fini = (void*)RedDest<i>;
/// red_data[i].f_comb = (void*)RedOp<i>;
/// red_data[i].flags = <Flag_i>;
/// ...
/// void* tg1 = __kmpc_taskred_modifier_init(loc, gtid, is_worksharing, n,
/// red_data);
/// \endcode
/// \param LHSExprs List of LHS in \a Data.ReductionOps reduction operations.
/// \param RHSExprs List of RHS in \a Data.ReductionOps reduction operations.
/// \param Data Additional data for task generation like tiedness, final
@ -2213,6 +2255,13 @@ public:
ArrayRef<const Expr *> RHSExprs,
const OMPTaskDataTy &Data) override;
/// Emits the following code for reduction clause with task modifier:
/// \code
/// __kmpc_task_reduction_modifier_fini(loc, gtid, is_worksharing);
/// \endcode
void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc,
bool IsWorksharingReduction) override;
/// Required to resolve existing problems in the runtime. Emits threadprivate
/// variables to store the size of the VLAs/array sections for
/// initializer/combiner/finalizer functions + emits threadprivate variable to

View File

@ -1169,21 +1169,23 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
SmallVector<const Expr *, 4> ReductionOps;
SmallVector<const Expr *, 4> LHSs;
SmallVector<const Expr *, 4> RHSs;
OMPTaskDataTy Data;
SmallVector<const Expr *, 4> TaskLHSs;
SmallVector<const Expr *, 4> TaskRHSs;
for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
auto IPriv = C->privates().begin();
auto IRed = C->reduction_ops().begin();
auto ILHS = C->lhs_exprs().begin();
auto IRHS = C->rhs_exprs().begin();
for (const Expr *Ref : C->varlists()) {
Shareds.emplace_back(Ref);
Privates.emplace_back(*IPriv);
ReductionOps.emplace_back(*IRed);
LHSs.emplace_back(*ILHS);
RHSs.emplace_back(*IRHS);
std::advance(IPriv, 1);
std::advance(IRed, 1);
std::advance(ILHS, 1);
std::advance(IRHS, 1);
Shareds.append(C->varlist_begin(), C->varlist_end());
Privates.append(C->privates().begin(), C->privates().end());
ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
if (C->getModifier() == OMPC_REDUCTION_task) {
Data.ReductionVars.append(C->privates().begin(), C->privates().end());
Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
Data.ReductionOps.append(C->reduction_ops().begin(),
C->reduction_ops().end());
TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
}
}
ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
@ -1261,6 +1263,117 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
++IPriv;
++Count;
}
if (!Data.ReductionVars.empty()) {
Data.IsReductionWithTaskMod = true;
Data.IsWorksharingReduction =
isOpenMPWorksharingDirective(D.getDirectiveKind());
llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
*this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data);
const Expr *TaskRedRef = nullptr;
switch (D.getDirectiveKind()) {
case OMPD_parallel:
TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr();
break;
case OMPD_for:
TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr();
break;
case OMPD_sections:
TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr();
break;
case OMPD_parallel_for:
TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr();
break;
case OMPD_parallel_master:
TaskRedRef =
cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr();
break;
case OMPD_parallel_sections:
TaskRedRef =
cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr();
break;
case OMPD_target_parallel:
TaskRedRef =
cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr();
break;
case OMPD_target_parallel_for:
TaskRedRef =
cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr();
break;
case OMPD_distribute_parallel_for:
TaskRedRef =
cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr();
break;
case OMPD_teams_distribute_parallel_for:
TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D)
.getTaskReductionRefExpr();
break;
case OMPD_target_teams_distribute_parallel_for:
TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D)
.getTaskReductionRefExpr();
break;
case OMPD_simd:
case OMPD_for_simd:
case OMPD_section:
case OMPD_single:
case OMPD_master:
case OMPD_critical:
case OMPD_parallel_for_simd:
case OMPD_task:
case OMPD_taskyield:
case OMPD_barrier:
case OMPD_taskwait:
case OMPD_taskgroup:
case OMPD_flush:
case OMPD_depobj:
case OMPD_scan:
case OMPD_ordered:
case OMPD_atomic:
case OMPD_teams:
case OMPD_target:
case OMPD_cancellation_point:
case OMPD_cancel:
case OMPD_target_data:
case OMPD_target_enter_data:
case OMPD_target_exit_data:
case OMPD_taskloop:
case OMPD_taskloop_simd:
case OMPD_master_taskloop:
case OMPD_master_taskloop_simd:
case OMPD_parallel_master_taskloop:
case OMPD_parallel_master_taskloop_simd:
case OMPD_distribute:
case OMPD_target_update:
case OMPD_distribute_parallel_for_simd:
case OMPD_distribute_simd:
case OMPD_target_parallel_for_simd:
case OMPD_target_simd:
case OMPD_teams_distribute:
case OMPD_teams_distribute_simd:
case OMPD_teams_distribute_parallel_for_simd:
case OMPD_target_teams:
case OMPD_target_teams_distribute:
case OMPD_target_teams_distribute_parallel_for_simd:
case OMPD_target_teams_distribute_simd:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_threadprivate:
case OMPD_allocate:
case OMPD_declare_reduction:
case OMPD_declare_mapper:
case OMPD_declare_simd:
case OMPD_requires:
case OMPD_declare_variant:
case OMPD_begin_declare_variant:
case OMPD_end_declare_variant:
case OMPD_unknown:
llvm_unreachable("Enexpected directive with task reductions.");
}
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl());
EmitVarDecl(*VD);
EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD),
/*Volatile=*/false, TaskRedRef->getType());
}
}
void CodeGenFunction::EmitOMPReductionClauseFinal(
@ -1272,14 +1385,22 @@ void CodeGenFunction::EmitOMPReductionClauseFinal(
llvm::SmallVector<const Expr *, 8> RHSExprs;
llvm::SmallVector<const Expr *, 8> ReductionOps;
bool HasAtLeastOneReduction = false;
bool IsReductionWithTaskMod = false;
for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
HasAtLeastOneReduction = true;
Privates.append(C->privates().begin(), C->privates().end());
LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
IsReductionWithTaskMod =
IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
}
if (HasAtLeastOneReduction) {
if (IsReductionWithTaskMod) {
CGM.getOpenMPRuntime().emitTaskReductionFini(
*this, D.getBeginLoc(),
isOpenMPWorksharingDirective(D.getDirectiveKind()));
}
bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
isOpenMPParallelDirective(D.getDirectiveKind()) ||
ReductionKind == OMPD_simd;
@ -3382,21 +3503,13 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
SmallVector<const Expr *, 4> LHSs;
SmallVector<const Expr *, 4> RHSs;
for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
auto IPriv = C->privates().begin();
auto IRed = C->reduction_ops().begin();
auto ILHS = C->lhs_exprs().begin();
auto IRHS = C->rhs_exprs().begin();
for (const Expr *Ref : C->varlists()) {
Data.ReductionVars.emplace_back(Ref);
Data.ReductionCopies.emplace_back(*IPriv);
Data.ReductionOps.emplace_back(*IRed);
LHSs.emplace_back(*ILHS);
RHSs.emplace_back(*IRHS);
std::advance(IPriv, 1);
std::advance(IRed, 1);
std::advance(ILHS, 1);
std::advance(IRHS, 1);
}
Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
Data.ReductionOps.append(C->reduction_ops().begin(),
C->reduction_ops().end());
LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
}
Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
*this, S.getBeginLoc(), LHSs, RHSs, Data);
@ -3776,21 +3889,13 @@ void CodeGenFunction::EmitOMPTaskgroupDirective(
SmallVector<const Expr *, 4> RHSs;
OMPTaskDataTy Data;
for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
auto IPriv = C->privates().begin();
auto IRed = C->reduction_ops().begin();
auto ILHS = C->lhs_exprs().begin();
auto IRHS = C->rhs_exprs().begin();
for (const Expr *Ref : C->varlists()) {
Data.ReductionVars.emplace_back(Ref);
Data.ReductionCopies.emplace_back(*IPriv);
Data.ReductionOps.emplace_back(*IRed);
LHSs.emplace_back(*ILHS);
RHSs.emplace_back(*IRHS);
std::advance(IPriv, 1);
std::advance(IRed, 1);
std::advance(ILHS, 1);
std::advance(IRHS, 1);
}
Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
Data.ReductionOps.append(C->reduction_ops().begin(),
C->reduction_ops().end());
LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
}
llvm::Value *ReductionDesc =
CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(),

View File

@ -519,11 +519,15 @@ public:
getTopMostTaskgroupReductionData(const ValueDecl *D, SourceRange &SR,
const Expr *&ReductionRef,
Expr *&TaskgroupDescriptor) const;
/// Return reduction reference expression for the current taskgroup.
/// Return reduction reference expression for the current taskgroup or
/// parallel/worksharing directives with task reductions.
Expr *getTaskgroupReductionRef() const {
assert(getTopOfStack().Directive == OMPD_taskgroup &&
"taskgroup reference expression requested for non taskgroup "
"directive.");
assert((getTopOfStack().Directive == OMPD_taskgroup ||
((isOpenMPParallelDirective(getTopOfStack().Directive) ||
isOpenMPWorksharingDirective(getTopOfStack().Directive)) &&
!isOpenMPSimdDirective(getTopOfStack().Directive))) &&
"taskgroup reference expression requested for non taskgroup or "
"parallel/worksharing directive.");
return getTopOfStack().TaskgroupReductionRef;
}
/// Checks if the given \p VD declaration is actually a taskgroup reduction
@ -1351,7 +1355,10 @@ void DSAStackTy::addTaskgroupReductionData(const ValueDecl *D, SourceRange SR,
"Additional reduction info may be specified only for reduction items.");
ReductionData &ReductionData = getTopOfStack().ReductionMap[D];
assert(ReductionData.ReductionRange.isInvalid() &&
getTopOfStack().Directive == OMPD_taskgroup &&
(getTopOfStack().Directive == OMPD_taskgroup ||
((isOpenMPParallelDirective(getTopOfStack().Directive) ||
isOpenMPWorksharingDirective(getTopOfStack().Directive)) &&
!isOpenMPSimdDirective(getTopOfStack().Directive))) &&
"Additional reduction info may be specified only once for reduction "
"items.");
ReductionData.set(BOK, SR);
@ -1374,7 +1381,10 @@ void DSAStackTy::addTaskgroupReductionData(const ValueDecl *D, SourceRange SR,
"Additional reduction info may be specified only for reduction items.");
ReductionData &ReductionData = getTopOfStack().ReductionMap[D];
assert(ReductionData.ReductionRange.isInvalid() &&
getTopOfStack().Directive == OMPD_taskgroup &&
(getTopOfStack().Directive == OMPD_taskgroup ||
((isOpenMPParallelDirective(getTopOfStack().Directive) ||
isOpenMPWorksharingDirective(getTopOfStack().Directive)) &&
!isOpenMPSimdDirective(getTopOfStack().Directive))) &&
"Additional reduction info may be specified only once for reduction "
"items.");
ReductionData.set(ReductionRef, SR);
@ -1395,7 +1405,8 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopMostTaskgroupReductionData(
assert(!isStackEmpty() && "Data-sharing attributes stack is empty.");
for (const_iterator I = begin() + 1, E = end(); I != E; ++I) {
const DSAInfo &Data = I->SharingMap.lookup(D);
if (Data.Attributes != OMPC_reduction || I->Directive != OMPD_taskgroup)
if (Data.Attributes != OMPC_reduction ||
Data.Modifier != OMPC_REDUCTION_task)
continue;
const ReductionData &ReductionData = I->ReductionMap.lookup(D);
if (!ReductionData.ReductionOp ||
@ -1407,8 +1418,8 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopMostTaskgroupReductionData(
"expression for the descriptor is not "
"set.");
TaskgroupDescriptor = I->TaskgroupReductionRef;
return DSAVarData(OMPD_taskgroup, OMPC_reduction, Data.RefExpr.getPointer(),
Data.PrivateCopy, I->DefaultAttrLoc, /*Modifier=*/0);
return DSAVarData(I->Directive, OMPC_reduction, Data.RefExpr.getPointer(),
Data.PrivateCopy, I->DefaultAttrLoc, OMPC_REDUCTION_task);
}
return DSAVarData();
}
@ -1420,7 +1431,8 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopMostTaskgroupReductionData(
assert(!isStackEmpty() && "Data-sharing attributes stack is empty.");
for (const_iterator I = begin() + 1, E = end(); I != E; ++I) {
const DSAInfo &Data = I->SharingMap.lookup(D);
if (Data.Attributes != OMPC_reduction || I->Directive != OMPD_taskgroup)
if (Data.Attributes != OMPC_reduction ||
Data.Modifier != OMPC_REDUCTION_task)
continue;
const ReductionData &ReductionData = I->ReductionMap.lookup(D);
if (!ReductionData.ReductionOp ||
@ -1432,8 +1444,8 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopMostTaskgroupReductionData(
"expression for the descriptor is not "
"set.");
TaskgroupDescriptor = I->TaskgroupReductionRef;
return DSAVarData(OMPD_taskgroup, OMPC_reduction, Data.RefExpr.getPointer(),
Data.PrivateCopy, I->DefaultAttrLoc, /*Modifier=*/0);
return DSAVarData(I->Directive, OMPC_reduction, Data.RefExpr.getPointer(),
Data.PrivateCopy, I->DefaultAttrLoc, OMPC_REDUCTION_task);
}
return DSAVarData();
}
@ -2229,7 +2241,12 @@ OpenMPClauseKind Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level,
// Consider taskgroup reduction descriptor variable a private
// to avoid possible capture in the region.
(DSAStack->hasExplicitDirective(
[](OpenMPDirectiveKind K) { return K == OMPD_taskgroup; },
[](OpenMPDirectiveKind K) {
return K == OMPD_taskgroup ||
((isOpenMPParallelDirective(K) ||
isOpenMPWorksharingDirective(K)) &&
!isOpenMPSimdDirective(K));
},
Level) &&
DSAStack->isTaskgroupReductionRef(D, Level)))
? OMPC_private
@ -4193,7 +4210,8 @@ StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S,
SmallVector<const OMPClauseWithPreInit *, 4> PICs;
// This is required for proper codegen.
for (OMPClause *Clause : Clauses) {
if (isOpenMPTaskingDirective(DSAStack->getCurrentDirective()) &&
if (!LangOpts.OpenMPSimd &&
isOpenMPTaskingDirective(DSAStack->getCurrentDirective()) &&
Clause->getClauseKind() == OMPC_in_reduction) {
// Capture taskgroup task_reduction descriptors inside the tasking regions
// with the corresponding in_reduction items.
@ -6137,6 +6155,7 @@ StmtResult Sema::ActOnOpenMPParallelDirective(ArrayRef<OMPClause *> Clauses,
setFunctionHasBranchProtectedScope();
return OMPParallelDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt,
DSAStack->getTaskgroupReductionRef(),
DSAStack->isCancelRegion());
}
@ -8620,8 +8639,9 @@ Sema::ActOnOpenMPForDirective(ArrayRef<OMPClause *> Clauses, Stmt *AStmt,
}
setFunctionHasBranchProtectedScope();
return OMPForDirective::Create(Context, StartLoc, EndLoc, NestedLoopCount,
Clauses, AStmt, B, DSAStack->isCancelRegion());
return OMPForDirective::Create(
Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
}
StmtResult Sema::ActOnOpenMPForSimdDirective(
@ -8698,6 +8718,7 @@ StmtResult Sema::ActOnOpenMPSectionsDirective(ArrayRef<OMPClause *> Clauses,
setFunctionHasBranchProtectedScope();
return OMPSectionsDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt,
DSAStack->getTaskgroupReductionRef(),
DSAStack->isCancelRegion());
}
@ -8858,9 +8879,9 @@ StmtResult Sema::ActOnOpenMPParallelForDirective(
}
setFunctionHasBranchProtectedScope();
return OMPParallelForDirective::Create(Context, StartLoc, EndLoc,
NestedLoopCount, Clauses, AStmt, B,
DSAStack->isCancelRegion());
return OMPParallelForDirective::Create(
Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
}
StmtResult Sema::ActOnOpenMPParallelForSimdDirective(
@ -8924,8 +8945,9 @@ Sema::ActOnOpenMPParallelMasterDirective(ArrayRef<OMPClause *> Clauses,
setFunctionHasBranchProtectedScope();
return OMPParallelMasterDirective::Create(Context, StartLoc, EndLoc, Clauses,
AStmt);
return OMPParallelMasterDirective::Create(
Context, StartLoc, EndLoc, Clauses, AStmt,
DSAStack->getTaskgroupReductionRef());
}
StmtResult
@ -8964,7 +8986,8 @@ Sema::ActOnOpenMPParallelSectionsDirective(ArrayRef<OMPClause *> Clauses,
setFunctionHasBranchProtectedScope();
return OMPParallelSectionsDirective::Create(
Context, StartLoc, EndLoc, Clauses, AStmt, DSAStack->isCancelRegion());
Context, StartLoc, EndLoc, Clauses, AStmt,
DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
}
/// detach and mergeable clauses are mutially exclusive, check for it.
@ -10016,8 +10039,9 @@ Sema::ActOnOpenMPTargetParallelDirective(ArrayRef<OMPClause *> Clauses,
setFunctionHasBranchProtectedScope();
return OMPTargetParallelDirective::Create(Context, StartLoc, EndLoc, Clauses,
AStmt, DSAStack->isCancelRegion());
return OMPTargetParallelDirective::Create(
Context, StartLoc, EndLoc, Clauses, AStmt,
DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
}
StmtResult Sema::ActOnOpenMPTargetParallelForDirective(
@ -10069,9 +10093,9 @@ StmtResult Sema::ActOnOpenMPTargetParallelForDirective(
}
setFunctionHasBranchProtectedScope();
return OMPTargetParallelForDirective::Create(Context, StartLoc, EndLoc,
NestedLoopCount, Clauses, AStmt,
B, DSAStack->isCancelRegion());
return OMPTargetParallelForDirective::Create(
Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
}
/// Check for existence of a map clause in the list of clauses.
@ -10683,7 +10707,7 @@ StmtResult Sema::ActOnOpenMPDistributeParallelForDirective(
setFunctionHasBranchProtectedScope();
return OMPDistributeParallelForDirective::Create(
Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
DSAStack->isCancelRegion());
DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
}
StmtResult Sema::ActOnOpenMPDistributeParallelForSimdDirective(
@ -11124,7 +11148,7 @@ StmtResult Sema::ActOnOpenMPTeamsDistributeParallelForDirective(
return OMPTeamsDistributeParallelForDirective::Create(
Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
DSAStack->isCancelRegion());
DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
}
StmtResult Sema::ActOnOpenMPTargetTeamsDirective(ArrayRef<OMPClause *> Clauses,
@ -11253,7 +11277,7 @@ StmtResult Sema::ActOnOpenMPTargetTeamsDistributeParallelForDirective(
setFunctionHasBranchProtectedScope();
return OMPTargetTeamsDistributeParallelForDirective::Create(
Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
DSAStack->isCancelRegion());
DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
}
StmtResult Sema::ActOnOpenMPTargetTeamsDistributeParallelForSimdDirective(
@ -15103,9 +15127,17 @@ static bool actOnOMPReductionKindClause(
}
// All reduction items are still marked as reduction (to do not increase
// code base size).
Stack->addDSA(D, RefExpr->IgnoreParens(), OMPC_reduction, Ref,
RD.RedModifier);
if (CurrDir == OMPD_taskgroup) {
unsigned Modifier = RD.RedModifier;
// Consider task_reductions as reductions with task modifier. Required for
// correct analysis of in_reduction clauses.
if (CurrDir == OMPD_taskgroup && ClauseKind == OMPC_task_reduction)
Modifier = OMPC_REDUCTION_task;
Stack->addDSA(D, RefExpr->IgnoreParens(), OMPC_reduction, Ref, Modifier);
if (Modifier == OMPC_REDUCTION_task &&
(CurrDir == OMPD_taskgroup ||
((isOpenMPParallelDirective(CurrDir) ||
isOpenMPWorksharingDirective(CurrDir)) &&
!isOpenMPSimdDirective(CurrDir)))) {
if (DeclareReductionRef.isUsable())
Stack->addTaskgroupReductionData(D, ReductionIdRange,
DeclareReductionRef.get());

View File

@ -2308,6 +2308,7 @@ void ASTStmtReader::VisitOMPParallelDirective(OMPParallelDirective *D) {
// The NumClauses field was read in ReadStmtFromStream.
Record.skipInts(1);
VisitOMPExecutableDirective(D);
D->setTaskReductionRefExpr(Record.readSubExpr());
D->setHasCancel(Record.readInt());
}
@ -2317,6 +2318,7 @@ void ASTStmtReader::VisitOMPSimdDirective(OMPSimdDirective *D) {
void ASTStmtReader::VisitOMPForDirective(OMPForDirective *D) {
VisitOMPLoopDirective(D);
D->setTaskReductionRefExpr(Record.readSubExpr());
D->setHasCancel(Record.readInt());
}
@ -2329,6 +2331,7 @@ void ASTStmtReader::VisitOMPSectionsDirective(OMPSectionsDirective *D) {
// The NumClauses field was read in ReadStmtFromStream.
Record.skipInts(1);
VisitOMPExecutableDirective(D);
D->setTaskReductionRefExpr(Record.readSubExpr());
D->setHasCancel(Record.readInt());
}
@ -2360,6 +2363,7 @@ void ASTStmtReader::VisitOMPCriticalDirective(OMPCriticalDirective *D) {
void ASTStmtReader::VisitOMPParallelForDirective(OMPParallelForDirective *D) {
VisitOMPLoopDirective(D);
D->setTaskReductionRefExpr(Record.readSubExpr());
D->setHasCancel(Record.readInt());
}
@ -2374,6 +2378,7 @@ void ASTStmtReader::VisitOMPParallelMasterDirective(
// The NumClauses field was read in ReadStmtFromStream.
Record.skipInts(1);
VisitOMPExecutableDirective(D);
D->setTaskReductionRefExpr(Record.readSubExpr());
}
void ASTStmtReader::VisitOMPParallelSectionsDirective(
@ -2382,6 +2387,7 @@ void ASTStmtReader::VisitOMPParallelSectionsDirective(
// The NumClauses field was read in ReadStmtFromStream.
Record.skipInts(1);
VisitOMPExecutableDirective(D);
D->setTaskReductionRefExpr(Record.readSubExpr());
D->setHasCancel(Record.readInt());
}
@ -2489,12 +2495,14 @@ void ASTStmtReader::VisitOMPTargetParallelDirective(
VisitStmt(D);
Record.skipInts(1);
VisitOMPExecutableDirective(D);
D->setTaskReductionRefExpr(Record.readSubExpr());
D->setHasCancel(Record.readBool());
}
void ASTStmtReader::VisitOMPTargetParallelForDirective(
OMPTargetParallelForDirective *D) {
VisitOMPLoopDirective(D);
D->setTaskReductionRefExpr(Record.readSubExpr());
D->setHasCancel(Record.readInt());
}
@ -2564,6 +2572,7 @@ void ASTStmtReader::VisitOMPTargetUpdateDirective(OMPTargetUpdateDirective *D) {
void ASTStmtReader::VisitOMPDistributeParallelForDirective(
OMPDistributeParallelForDirective *D) {
VisitOMPLoopDirective(D);
D->setTaskReductionRefExpr(Record.readSubExpr());
D->setHasCancel(Record.readInt());
}
@ -2604,6 +2613,7 @@ void ASTStmtReader::VisitOMPTeamsDistributeParallelForSimdDirective(
void ASTStmtReader::VisitOMPTeamsDistributeParallelForDirective(
OMPTeamsDistributeParallelForDirective *D) {
VisitOMPLoopDirective(D);
D->setTaskReductionRefExpr(Record.readSubExpr());
D->setHasCancel(Record.readInt());
}
@ -2622,6 +2632,7 @@ void ASTStmtReader::VisitOMPTargetTeamsDistributeDirective(
void ASTStmtReader::VisitOMPTargetTeamsDistributeParallelForDirective(
OMPTargetTeamsDistributeParallelForDirective *D) {
VisitOMPLoopDirective(D);
D->setTaskReductionRefExpr(Record.readSubExpr());
D->setHasCancel(Record.readInt());
}

View File

@ -2195,6 +2195,7 @@ void ASTStmtWriter::VisitOMPParallelDirective(OMPParallelDirective *D) {
VisitStmt(D);
Record.push_back(D->getNumClauses());
VisitOMPExecutableDirective(D);
Record.AddStmt(D->getTaskReductionRefExpr());
Record.push_back(D->hasCancel() ? 1 : 0);
Code = serialization::STMT_OMP_PARALLEL_DIRECTIVE;
}
@ -2206,6 +2207,7 @@ void ASTStmtWriter::VisitOMPSimdDirective(OMPSimdDirective *D) {
void ASTStmtWriter::VisitOMPForDirective(OMPForDirective *D) {
VisitOMPLoopDirective(D);
Record.AddStmt(D->getTaskReductionRefExpr());
Record.push_back(D->hasCancel() ? 1 : 0);
Code = serialization::STMT_OMP_FOR_DIRECTIVE;
}
@ -2219,6 +2221,7 @@ void ASTStmtWriter::VisitOMPSectionsDirective(OMPSectionsDirective *D) {
VisitStmt(D);
Record.push_back(D->getNumClauses());
VisitOMPExecutableDirective(D);
Record.AddStmt(D->getTaskReductionRefExpr());
Record.push_back(D->hasCancel() ? 1 : 0);
Code = serialization::STMT_OMP_SECTIONS_DIRECTIVE;
}
@ -2253,6 +2256,7 @@ void ASTStmtWriter::VisitOMPCriticalDirective(OMPCriticalDirective *D) {
void ASTStmtWriter::VisitOMPParallelForDirective(OMPParallelForDirective *D) {
VisitOMPLoopDirective(D);
Record.AddStmt(D->getTaskReductionRefExpr());
Record.push_back(D->hasCancel() ? 1 : 0);
Code = serialization::STMT_OMP_PARALLEL_FOR_DIRECTIVE;
}
@ -2268,6 +2272,7 @@ void ASTStmtWriter::VisitOMPParallelMasterDirective(
VisitStmt(D);
Record.push_back(D->getNumClauses());
VisitOMPExecutableDirective(D);
Record.AddStmt(D->getTaskReductionRefExpr());
Code = serialization::STMT_OMP_PARALLEL_MASTER_DIRECTIVE;
}
@ -2276,6 +2281,7 @@ void ASTStmtWriter::VisitOMPParallelSectionsDirective(
VisitStmt(D);
Record.push_back(D->getNumClauses());
VisitOMPExecutableDirective(D);
Record.AddStmt(D->getTaskReductionRefExpr());
Record.push_back(D->hasCancel() ? 1 : 0);
Code = serialization::STMT_OMP_PARALLEL_SECTIONS_DIRECTIVE;
}
@ -2336,6 +2342,7 @@ void ASTStmtWriter::VisitOMPTargetParallelDirective(
VisitStmt(D);
Record.push_back(D->getNumClauses());
VisitOMPExecutableDirective(D);
Record.AddStmt(D->getTaskReductionRefExpr());
Record.writeBool(D->hasCancel());
Code = serialization::STMT_OMP_TARGET_PARALLEL_DIRECTIVE;
}
@ -2343,6 +2350,7 @@ void ASTStmtWriter::VisitOMPTargetParallelDirective(
void ASTStmtWriter::VisitOMPTargetParallelForDirective(
OMPTargetParallelForDirective *D) {
VisitOMPLoopDirective(D);
Record.AddStmt(D->getTaskReductionRefExpr());
Record.push_back(D->hasCancel() ? 1 : 0);
Code = serialization::STMT_OMP_TARGET_PARALLEL_FOR_DIRECTIVE;
}
@ -2476,6 +2484,7 @@ void ASTStmtWriter::VisitOMPTargetUpdateDirective(OMPTargetUpdateDirective *D) {
void ASTStmtWriter::VisitOMPDistributeParallelForDirective(
OMPDistributeParallelForDirective *D) {
VisitOMPLoopDirective(D);
Record.AddStmt(D->getTaskReductionRefExpr());
Record.push_back(D->hasCancel() ? 1 : 0);
Code = serialization::STMT_OMP_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE;
}
@ -2524,6 +2533,7 @@ void ASTStmtWriter::VisitOMPTeamsDistributeParallelForSimdDirective(
void ASTStmtWriter::VisitOMPTeamsDistributeParallelForDirective(
OMPTeamsDistributeParallelForDirective *D) {
VisitOMPLoopDirective(D);
Record.AddStmt(D->getTaskReductionRefExpr());
Record.push_back(D->hasCancel() ? 1 : 0);
Code = serialization::STMT_OMP_TEAMS_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE;
}
@ -2544,6 +2554,7 @@ void ASTStmtWriter::VisitOMPTargetTeamsDistributeDirective(
void ASTStmtWriter::VisitOMPTargetTeamsDistributeParallelForDirective(
OMPTargetTeamsDistributeParallelForDirective *D) {
VisitOMPLoopDirective(D);
Record.AddStmt(D->getTaskReductionRefExpr());
Record.push_back(D->hasCancel() ? 1 : 0);
Code = serialization::STMT_OMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE;
}

View File

@ -0,0 +1,130 @@
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
// CHECK: @main
int main(int argc, char **argv) {
#pragma omp target teams
#pragma omp distribute parallel for reduction(task, +: argc, argv[0:10][0:argc])
for (long long i = 0; i < 10; ++i) {
#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
;
}
}
// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, i8*** %{{.+}})
// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}}, i8*** {{.+}})
// CHECK: alloca i32,
// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
// CHECK: [[TG:%.+]] = alloca i8*,
// Init firstprivate copy of argc
// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
// Init firstprivate copy of argv[0:10][0:argc]
// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
// CHECK: [[INIT]]:
// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
// CHECK: store i8 0, i8* [[EL]],
// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
// CHECK: [[DONE]]:
// Register task reduction.
// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]])
// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1)
// CHECK: call i32 @__kmpc_reduce_nowait(
// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
// CHECK: store i32 0, i32* %{{.+}},
// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
// CHECK: store i32 [[ADD]], i32* %{{.+}},
// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
// CHECK: phi i8*
// CHECK: store i8 0, i8* [[EL:%.+]],
// CHECK: getelementptr i8, i8* [[EL]], i32 1
// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
// CHECK: phi i8*
// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
// CHECK: getelementptr i8, i8* [[EL]], i32 1
// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
#endif

View File

@ -0,0 +1,130 @@
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
// CHECK: @main
int main(int argc, char **argv) {
#pragma omp parallel
#pragma omp for reduction(task, +: argc, argv[0:10][0:argc])
for (long long i = 0; i < 10; ++i) {
#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
;
}
}
// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}})
// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}})
// CHECK: alloca i32,
// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
// CHECK: [[TG:%.+]] = alloca i8*,
// Init firstprivate copy of argc
// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
// Init firstprivate copy of argv[0:10][0:argc]
// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
// CHECK: [[INIT]]:
// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
// CHECK: store i8 0, i8* [[EL]],
// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
// CHECK: [[DONE]]:
// Register task reduction.
// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]])
// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1)
// CHECK: call i32 @__kmpc_reduce(
// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
// CHECK: store i32 0, i32* %{{.+}},
// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
// CHECK: store i32 [[ADD]], i32* %{{.+}},
// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
// CHECK: phi i8*
// CHECK: store i8 0, i8* [[EL:%.+]],
// CHECK: getelementptr i8, i8* [[EL]], i32 1
// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
// CHECK: phi i8*
// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
// CHECK: getelementptr i8, i8* [[EL]], i32 1
// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
#endif

View File

@ -0,0 +1,129 @@
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
// CHECK: @main
int main(int argc, char **argv) {
#pragma omp parallel for reduction(task, +: argc, argv[0:10][0:argc])
for (long long i = 0; i < 10; ++i) {
#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
;
}
}
// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}})
// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}})
// CHECK: alloca i32,
// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
// CHECK: [[TG:%.+]] = alloca i8*,
// Init firstprivate copy of argc
// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
// Init firstprivate copy of argv[0:10][0:argc]
// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
// CHECK: [[INIT]]:
// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
// CHECK: store i8 0, i8* [[EL]],
// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
// CHECK: [[DONE]]:
// Register task reduction.
// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]])
// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1)
// CHECK: call i32 @__kmpc_reduce_nowait(
// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
// CHECK: store i32 0, i32* %{{.+}},
// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
// CHECK: store i32 [[ADD]], i32* %{{.+}},
// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
// CHECK: phi i8*
// CHECK: store i8 0, i8* [[EL:%.+]],
// CHECK: getelementptr i8, i8* [[EL]], i32 1
// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
// CHECK: phi i8*
// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
// CHECK: getelementptr i8, i8* [[EL]], i32 1
// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
#endif

View File

@ -0,0 +1,128 @@
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
// CHECK: @main
int main(int argc, char **argv) {
#pragma omp parallel master reduction(task, +: argc, argv[0:10][0:argc])
{
#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
;
}
}
// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}})
// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}})
// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
// CHECK: [[TG:%.+]] = alloca i8*,
// Init firstprivate copy of argc
// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
// Init firstprivate copy of argv[0:10][0:argc]
// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
// CHECK: [[INIT]]:
// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
// CHECK: store i8 0, i8* [[EL]],
// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
// CHECK: [[DONE]]:
// Register task reduction.
// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0, i32 2, i8* [[BC]])
// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0)
// CHECK: call i32 @__kmpc_reduce_nowait(
// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
// CHECK: store i32 0, i32* %{{.+}},
// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
// CHECK: store i32 [[ADD]], i32* %{{.+}},
// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
// CHECK: phi i8*
// CHECK: store i8 0, i8* [[EL:%.+]],
// CHECK: getelementptr i8, i8* [[EL]], i32 1
// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
// CHECK: phi i8*
// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
// CHECK: getelementptr i8, i8* [[EL]], i32 1
// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
#endif

View File

@ -0,0 +1,128 @@
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
// CHECK: @main
int main(int argc, char **argv) {
#pragma omp parallel reduction(task, +: argc, argv[0:10][0:argc])
{
#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
;
}
}
// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}})
// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}})
// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
// CHECK: [[TG:%.+]] = alloca i8*,
// Init firstprivate copy of argc
// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
// Init firstprivate copy of argv[0:10][0:argc]
// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
// CHECK: [[INIT]]:
// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
// CHECK: store i8 0, i8* [[EL]],
// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
// CHECK: [[DONE]]:
// Register task reduction.
// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0, i32 2, i8* [[BC]])
// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0)
// CHECK: call i32 @__kmpc_reduce_nowait(
// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
// CHECK: store i32 0, i32* %{{.+}},
// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
// CHECK: store i32 [[ADD]], i32* %{{.+}},
// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
// CHECK: phi i8*
// CHECK: store i8 0, i8* [[EL:%.+]],
// CHECK: getelementptr i8, i8* [[EL]], i32 1
// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
// CHECK: phi i8*
// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
// CHECK: getelementptr i8, i8* [[EL]], i32 1
// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
#endif

View File

@ -0,0 +1,133 @@
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
// CHECK: @main
int main(int argc, char **argv) {
#pragma omp parallel sections reduction(task, +: argc, argv[0:10][0:argc])
{
#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
;
}
}
// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}})
// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}})
// CHECK: alloca i32,
// CHECK: alloca i32,
// CHECK: alloca i32,
// CHECK: alloca i32,
// CHECK: alloca i32,
// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
// CHECK: [[TG:%.+]] = alloca i8*,
// Init firstprivate copy of argc
// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
// Init firstprivate copy of argv[0:10][0:argc]
// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
// CHECK: [[INIT]]:
// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
// CHECK: store i8 0, i8* [[EL]],
// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
// CHECK: [[DONE]]:
// Register task reduction.
// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]])
// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1)
// CHECK: call i32 @__kmpc_reduce_nowait(
// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
// CHECK: store i32 0, i32* %{{.+}},
// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
// CHECK: store i32 [[ADD]], i32* %{{.+}},
// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
// CHECK: phi i8*
// CHECK: store i8 0, i8* [[EL:%.+]],
// CHECK: getelementptr i8, i8* [[EL]], i32 1
// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
// CHECK: phi i8*
// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
// CHECK: getelementptr i8, i8* [[EL]], i32 1
// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
#endif

View File

@ -0,0 +1,134 @@
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
// CHECK: @main
int main(int argc, char **argv) {
#pragma omp parallel
#pragma omp sections reduction(task, +: argc, argv[0:10][0:argc])
{
#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
;
}
}
// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}})
// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}})
// CHECK: alloca i32,
// CHECK: alloca i32,
// CHECK: alloca i32,
// CHECK: alloca i32,
// CHECK: alloca i32,
// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
// CHECK: [[TG:%.+]] = alloca i8*,
// Init firstprivate copy of argc
// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
// Init firstprivate copy of argv[0:10][0:argc]
// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
// CHECK: [[INIT]]:
// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
// CHECK: store i8 0, i8* [[EL]],
// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
// CHECK: [[DONE]]:
// Register task reduction.
// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]])
// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1)
// CHECK: call i32 @__kmpc_reduce(
// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
// CHECK: store i32 0, i32* %{{.+}},
// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
// CHECK: store i32 [[ADD]], i32* %{{.+}},
// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
// CHECK: phi i8*
// CHECK: store i8 0, i8* [[EL:%.+]],
// CHECK: getelementptr i8, i8* [[EL]], i32 1
// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
// CHECK: phi i8*
// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
// CHECK: getelementptr i8, i8* [[EL]], i32 1
// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
#endif

View File

@ -0,0 +1,129 @@
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
// CHECK: @main
int main(int argc, char **argv) {
#pragma omp target parallel for reduction(task, +: argc, argv[0:10][0:argc])
for (long long i = 0; i < 10; ++i) {
#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
;
}
}
// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}})
// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}})
// CHECK: alloca i32,
// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
// CHECK: [[TG:%.+]] = alloca i8*,
// Init firstprivate copy of argc
// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
// Init firstprivate copy of argv[0:10][0:argc]
// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
// CHECK: [[INIT]]:
// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
// CHECK: store i8 0, i8* [[EL]],
// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
// CHECK: [[DONE]]:
// Register task reduction.
// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]])
// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1)
// CHECK: call i32 @__kmpc_reduce_nowait(
// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
// CHECK: store i32 0, i32* %{{.+}},
// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
// CHECK: store i32 [[ADD]], i32* %{{.+}},
// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
// CHECK: phi i8*
// CHECK: store i8 0, i8* [[EL:%.+]],
// CHECK: getelementptr i8, i8* [[EL]], i32 1
// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
// CHECK: phi i8*
// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
// CHECK: getelementptr i8, i8* [[EL]], i32 1
// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
#endif

View File

@ -0,0 +1,128 @@
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
// CHECK: @main
int main(int argc, char **argv) {
#pragma omp target parallel reduction(task, +: argc, argv[0:10][0:argc])
{
#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
;
}
}
// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}})
// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}})
// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
// CHECK: [[TG:%.+]] = alloca i8*,
// Init firstprivate copy of argc
// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
// Init firstprivate copy of argv[0:10][0:argc]
// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
// CHECK: [[INIT]]:
// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
// CHECK: store i8 0, i8* [[EL]],
// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
// CHECK: [[DONE]]:
// Register task reduction.
// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0, i32 2, i8* [[BC]])
// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0)
// CHECK: call i32 @__kmpc_reduce_nowait(
// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
// CHECK: store i32 0, i32* %{{.+}},
// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
// CHECK: store i32 [[ADD]], i32* %{{.+}},
// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
// CHECK: phi i8*
// CHECK: store i8 0, i8* [[EL:%.+]],
// CHECK: getelementptr i8, i8* [[EL]], i32 1
// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
// CHECK: phi i8*
// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
// CHECK: getelementptr i8, i8* [[EL]], i32 1
// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
#endif

View File

@ -0,0 +1,129 @@
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
// CHECK: @main
int main(int argc, char **argv) {
#pragma omp target teams distribute parallel for reduction(task, +: argc, argv[0:10][0:argc])
for (long long i = 0; i < 10; ++i) {
#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
;
}
}
// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, i8*** %{{.+}})
// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}}, i8*** {{.+}})
// CHECK: alloca i32,
// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
// CHECK: [[TR:%.+]] = alloca [2 x [[TASKRED_TY:%struct.kmp_taskred_input_t.*]]],
// CHECK: [[TG:%.+]] = alloca i8*,
// Init firstprivate copy of argc
// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
// Init firstprivate copy of argv[0:10][0:argc]
// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
// CHECK: [[INIT]]:
// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
// CHECK: store i8 0, i8* [[EL]],
// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
// CHECK: [[DONE]]:
// Register task reduction.
// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x [[TASKRED_TY]]], [2 x [[TASKRED_TY]]]* [[TR]], i64 0, i64 0
// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 0
// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 1
// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 2
// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 3
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 4
// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 5
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 6
// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x [[TASKRED_TY]]], [2 x [[TASKRED_TY]]]* [[TR]], i64 0, i64 1
// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 0
// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 1
// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 2
// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 3
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 4
// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 5
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 6
// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
// CHECK: [[BC:%.+]] = bitcast [2 x [[TASKRED_TY]]]* [[TR]] to i8*
// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]])
// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1)
// CHECK: call i32 @__kmpc_reduce_nowait(
// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
// CHECK: store i32 0, i32* %{{.+}},
// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
// CHECK: store i32 [[ADD]], i32* %{{.+}},
// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
// CHECK: phi i8*
// CHECK: store i8 0, i8* [[EL:%.+]],
// CHECK: getelementptr i8, i8* [[EL]], i32 1
// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
// CHECK: phi i8*
// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
// CHECK: getelementptr i8, i8* [[EL]], i32 1
// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
#endif

View File

@ -0,0 +1,130 @@
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
// CHECK: @main
int main(int argc, char **argv) {
#pragma omp target
#pragma omp teams distribute parallel for reduction(task, +: argc, argv[0:10][0:argc])
for (long long i = 0; i < 10; ++i) {
#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
;
}
}
// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, i8*** %{{.+}})
// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}}, i8*** {{.+}})
// CHECK: alloca i32,
// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
// CHECK: [[TR:%.+]] = alloca [2 x [[TASKRED_TY:%struct.kmp_taskred_input_t.*]]],
// CHECK: [[TG:%.+]] = alloca i8*,
// Init firstprivate copy of argc
// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
// Init firstprivate copy of argv[0:10][0:argc]
// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
// CHECK: [[INIT]]:
// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
// CHECK: store i8 0, i8* [[EL]],
// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
// CHECK: [[DONE]]:
// Register task reduction.
// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x [[TASKRED_TY]]], [2 x [[TASKRED_TY]]]* [[TR]], i64 0, i64 0
// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 0
// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 1
// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 2
// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 3
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 4
// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 5
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 6
// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x [[TASKRED_TY]]], [2 x [[TASKRED_TY]]]* [[TR]], i64 0, i64 1
// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 0
// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 1
// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 2
// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 3
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 4
// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 5
// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 6
// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
// CHECK: [[BC:%.+]] = bitcast [2 x [[TASKRED_TY]]]* [[TR]] to i8*
// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]])
// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1)
// CHECK: call i32 @__kmpc_reduce_nowait(
// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
// CHECK: store i32 0, i32* %{{.+}},
// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
// CHECK: store i32 [[ADD]], i32* %{{.+}},
// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
// CHECK: phi i8*
// CHECK: store i8 0, i8* [[EL:%.+]],
// CHECK: getelementptr i8, i8* [[EL]], i32 1
// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
// CHECK: phi i8*
// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
// CHECK: getelementptr i8, i8* [[EL]], i32 1
// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
#endif