2014-05-06 18:08:46 +08:00
|
|
|
//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
|
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This provides a class for OpenMP runtime code generation.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2016-01-06 21:42:12 +08:00
|
|
|
#include "CGCXXABI.h"
|
|
|
|
#include "CGCleanup.h"
|
2014-05-06 18:08:46 +08:00
|
|
|
#include "CGOpenMPRuntime.h"
|
|
|
|
#include "CodeGenFunction.h"
|
|
|
|
#include "clang/AST/Decl.h"
|
2015-01-14 19:29:14 +08:00
|
|
|
#include "clang/AST/StmtOpenMP.h"
|
2014-05-06 18:08:46 +08:00
|
|
|
#include "llvm/ADT/ArrayRef.h"
|
2016-01-06 21:42:12 +08:00
|
|
|
#include "llvm/Bitcode/ReaderWriter.h"
|
[OPENMP] Codegen for 'if' clause in 'parallel' directive.
Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive.
If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
global_thread_id.addr = alloca i32
store i32 global_thread_id, global_thread_id.addr
zero.addr = alloca i32
store i32 0, zero.addr
kmpc_serialized_parallel(loc, global_thread_id);
microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/);
kmpc_end_serialized_parallel(loc, global_thread_id);
Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call.
Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D4716
llvm-svn: 219597
2014-10-13 14:02:40 +08:00
|
|
|
#include "llvm/IR/CallSite.h"
|
2014-05-06 18:08:46 +08:00
|
|
|
#include "llvm/IR/DerivedTypes.h"
|
|
|
|
#include "llvm/IR/GlobalValue.h"
|
|
|
|
#include "llvm/IR/Value.h"
|
2016-01-06 21:42:12 +08:00
|
|
|
#include "llvm/Support/Format.h"
|
2014-05-06 18:08:46 +08:00
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2014-06-18 15:08:49 +08:00
|
|
|
#include <cassert>
|
2014-05-06 18:08:46 +08:00
|
|
|
|
|
|
|
using namespace clang;
|
|
|
|
using namespace CodeGen;
|
|
|
|
|
2014-10-10 21:57:57 +08:00
|
|
|
namespace {
|
2015-02-26 18:27:34 +08:00
|
|
|
/// \brief Base class for handling code generation inside OpenMP regions.
|
2014-10-10 20:19:54 +08:00
|
|
|
class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
|
|
|
|
public:
|
2015-04-10 12:50:10 +08:00
|
|
|
/// \brief Kinds of OpenMP regions used in codegen.
|
|
|
|
enum CGOpenMPRegionKind {
|
|
|
|
/// \brief Region with outlined function for standalone 'parallel'
|
|
|
|
/// directive.
|
|
|
|
ParallelOutlinedRegion,
|
|
|
|
/// \brief Region with outlined function for standalone 'task' directive.
|
|
|
|
TaskOutlinedRegion,
|
|
|
|
/// \brief Region for constructs that do not require function outlining,
|
|
|
|
/// like 'for', 'sections', 'atomic' etc. directives.
|
|
|
|
InlinedRegion,
|
2015-10-03 00:14:20 +08:00
|
|
|
/// \brief Region with outlined function for standalone 'target' directive.
|
|
|
|
TargetRegion,
|
2015-04-10 12:50:10 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
CGOpenMPRegionInfo(const CapturedStmt &CS,
|
|
|
|
const CGOpenMPRegionKind RegionKind,
|
2015-09-15 20:52:43 +08:00
|
|
|
const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
|
|
|
|
bool HasCancel)
|
2015-04-10 12:50:10 +08:00
|
|
|
: CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
|
2015-09-15 20:52:43 +08:00
|
|
|
CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
|
2014-10-10 20:19:54 +08:00
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
|
2015-09-15 20:52:43 +08:00
|
|
|
const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
|
|
|
|
bool HasCancel)
|
2015-07-03 17:56:58 +08:00
|
|
|
: CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
|
2015-09-15 20:52:43 +08:00
|
|
|
Kind(Kind), HasCancel(HasCancel) {}
|
2015-02-26 18:27:34 +08:00
|
|
|
|
|
|
|
/// \brief Get a variable or parameter for storing global thread id
|
2014-10-10 20:19:54 +08:00
|
|
|
/// inside OpenMP construct.
|
2015-02-26 18:27:34 +08:00
|
|
|
virtual const VarDecl *getThreadIDVariable() const = 0;
|
2014-10-10 20:19:54 +08:00
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
/// \brief Emit the captured statement body.
|
2015-09-11 01:07:54 +08:00
|
|
|
void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
|
2015-04-10 12:50:10 +08:00
|
|
|
|
2015-02-26 18:27:34 +08:00
|
|
|
/// \brief Get an LValue for the current ThreadID variable.
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
/// \return LValue for thread id variable. This LValue always has type int32*.
|
|
|
|
virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
|
2014-10-10 20:19:54 +08:00
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
|
2015-02-26 18:27:34 +08:00
|
|
|
|
2015-07-03 17:56:58 +08:00
|
|
|
OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
|
|
|
|
|
2015-09-15 20:52:43 +08:00
|
|
|
bool hasCancel() const { return HasCancel; }
|
|
|
|
|
2014-10-10 20:19:54 +08:00
|
|
|
static bool classof(const CGCapturedStmtInfo *Info) {
|
|
|
|
return Info->getKind() == CR_OpenMP;
|
|
|
|
}
|
2015-04-10 12:50:10 +08:00
|
|
|
|
2015-02-26 18:27:34 +08:00
|
|
|
protected:
|
2015-04-10 12:50:10 +08:00
|
|
|
CGOpenMPRegionKind RegionKind;
|
2016-01-13 04:54:36 +08:00
|
|
|
RegionCodeGenTy CodeGen;
|
2015-07-03 17:56:58 +08:00
|
|
|
OpenMPDirectiveKind Kind;
|
2015-09-15 20:52:43 +08:00
|
|
|
bool HasCancel;
|
2015-02-26 18:27:34 +08:00
|
|
|
};
|
2014-10-10 20:19:54 +08:00
|
|
|
|
2015-02-26 18:27:34 +08:00
|
|
|
/// \brief API for captured statement code generation in OpenMP constructs.
|
|
|
|
class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo {
|
|
|
|
public:
|
2015-04-10 12:50:10 +08:00
|
|
|
CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
|
2015-07-03 17:56:58 +08:00
|
|
|
const RegionCodeGenTy &CodeGen,
|
2015-09-15 20:52:43 +08:00
|
|
|
OpenMPDirectiveKind Kind, bool HasCancel)
|
|
|
|
: CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
|
|
|
|
HasCancel),
|
2015-04-10 12:50:10 +08:00
|
|
|
ThreadIDVar(ThreadIDVar) {
|
2015-02-26 18:27:34 +08:00
|
|
|
assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
|
|
|
|
}
|
2016-02-11 03:11:58 +08:00
|
|
|
|
2015-02-26 18:27:34 +08:00
|
|
|
/// \brief Get a variable or parameter for storing global thread id
|
|
|
|
/// inside OpenMP construct.
|
2015-04-11 10:00:23 +08:00
|
|
|
const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
|
2015-04-10 12:50:10 +08:00
|
|
|
|
2014-10-10 20:19:54 +08:00
|
|
|
/// \brief Get the name of the capture helper.
|
2014-10-10 21:57:57 +08:00
|
|
|
StringRef getHelperName() const override { return ".omp_outlined."; }
|
2014-10-10 20:19:54 +08:00
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
static bool classof(const CGCapturedStmtInfo *Info) {
|
|
|
|
return CGOpenMPRegionInfo::classof(Info) &&
|
|
|
|
cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
|
|
|
|
ParallelOutlinedRegion;
|
|
|
|
}
|
|
|
|
|
2014-10-10 20:19:54 +08:00
|
|
|
private:
|
|
|
|
/// \brief A variable or parameter storing global thread id for OpenMP
|
|
|
|
/// constructs.
|
|
|
|
const VarDecl *ThreadIDVar;
|
2015-02-26 18:27:34 +08:00
|
|
|
};
|
|
|
|
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
/// \brief API for captured statement code generation in OpenMP constructs.
|
|
|
|
class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo {
|
|
|
|
public:
|
2015-04-10 12:50:10 +08:00
|
|
|
CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
const VarDecl *ThreadIDVar,
|
2015-07-03 17:56:58 +08:00
|
|
|
const RegionCodeGenTy &CodeGen,
|
2015-09-15 20:52:43 +08:00
|
|
|
OpenMPDirectiveKind Kind, bool HasCancel)
|
|
|
|
: CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
|
2015-04-10 12:50:10 +08:00
|
|
|
ThreadIDVar(ThreadIDVar) {
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
|
|
|
|
}
|
2016-02-11 03:11:58 +08:00
|
|
|
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
/// \brief Get a variable or parameter for storing global thread id
|
|
|
|
/// inside OpenMP construct.
|
2015-04-11 10:00:23 +08:00
|
|
|
const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
|
|
|
|
/// \brief Get an LValue for the current ThreadID variable.
|
2015-04-11 10:00:23 +08:00
|
|
|
LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
|
|
|
|
/// \brief Get the name of the capture helper.
|
|
|
|
StringRef getHelperName() const override { return ".omp_outlined."; }
|
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
static bool classof(const CGCapturedStmtInfo *Info) {
|
|
|
|
return CGOpenMPRegionInfo::classof(Info) &&
|
|
|
|
cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
|
|
|
|
TaskOutlinedRegion;
|
|
|
|
}
|
|
|
|
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
private:
|
|
|
|
/// \brief A variable or parameter storing global thread id for OpenMP
|
|
|
|
/// constructs.
|
|
|
|
const VarDecl *ThreadIDVar;
|
|
|
|
};
|
|
|
|
|
2015-02-26 18:27:34 +08:00
|
|
|
/// \brief API for inlined captured statement code generation in OpenMP
|
|
|
|
/// constructs.
|
|
|
|
class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
|
|
|
|
public:
|
2015-04-10 12:50:10 +08:00
|
|
|
CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
|
2015-07-03 17:56:58 +08:00
|
|
|
const RegionCodeGenTy &CodeGen,
|
2015-09-15 20:52:43 +08:00
|
|
|
OpenMPDirectiveKind Kind, bool HasCancel)
|
|
|
|
: CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
|
|
|
|
OldCSI(OldCSI),
|
2015-02-26 18:27:34 +08:00
|
|
|
OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
|
2016-02-11 03:11:58 +08:00
|
|
|
|
2015-02-26 18:27:34 +08:00
|
|
|
// \brief Retrieve the value of the context parameter.
|
2015-04-11 10:00:23 +08:00
|
|
|
llvm::Value *getContextValue() const override {
|
2015-02-26 18:27:34 +08:00
|
|
|
if (OuterRegionInfo)
|
|
|
|
return OuterRegionInfo->getContextValue();
|
|
|
|
llvm_unreachable("No context value for inlined OpenMP region");
|
|
|
|
}
|
2016-02-11 03:11:58 +08:00
|
|
|
|
2015-09-11 01:07:54 +08:00
|
|
|
void setContextValue(llvm::Value *V) override {
|
2015-04-10 12:50:10 +08:00
|
|
|
if (OuterRegionInfo) {
|
|
|
|
OuterRegionInfo->setContextValue(V);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
llvm_unreachable("No context value for inlined OpenMP region");
|
|
|
|
}
|
2016-02-11 03:11:58 +08:00
|
|
|
|
2015-02-26 18:27:34 +08:00
|
|
|
/// \brief Lookup the captured field decl for a variable.
|
2015-04-11 10:00:23 +08:00
|
|
|
const FieldDecl *lookup(const VarDecl *VD) const override {
|
2015-02-26 18:27:34 +08:00
|
|
|
if (OuterRegionInfo)
|
|
|
|
return OuterRegionInfo->lookup(VD);
|
2015-04-15 12:52:20 +08:00
|
|
|
// If there is no outer outlined region,no need to lookup in a list of
|
|
|
|
// captured variables, we can use the original one.
|
|
|
|
return nullptr;
|
2015-02-26 18:27:34 +08:00
|
|
|
}
|
2016-02-11 03:11:58 +08:00
|
|
|
|
2015-04-11 10:00:23 +08:00
|
|
|
FieldDecl *getThisFieldDecl() const override {
|
2015-02-26 18:27:34 +08:00
|
|
|
if (OuterRegionInfo)
|
|
|
|
return OuterRegionInfo->getThisFieldDecl();
|
|
|
|
return nullptr;
|
|
|
|
}
|
2016-02-11 03:11:58 +08:00
|
|
|
|
2015-02-26 18:27:34 +08:00
|
|
|
/// \brief Get a variable or parameter for storing global thread id
|
|
|
|
/// inside OpenMP construct.
|
2015-04-11 10:00:23 +08:00
|
|
|
const VarDecl *getThreadIDVariable() const override {
|
2015-02-26 18:27:34 +08:00
|
|
|
if (OuterRegionInfo)
|
|
|
|
return OuterRegionInfo->getThreadIDVariable();
|
|
|
|
return nullptr;
|
|
|
|
}
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
|
2015-02-26 18:27:34 +08:00
|
|
|
/// \brief Get the name of the capture helper.
|
2015-04-11 10:00:23 +08:00
|
|
|
StringRef getHelperName() const override {
|
2015-04-10 12:50:10 +08:00
|
|
|
if (auto *OuterRegionInfo = getOldCSI())
|
|
|
|
return OuterRegionInfo->getHelperName();
|
2015-02-26 18:27:34 +08:00
|
|
|
llvm_unreachable("No helper name for inlined OpenMP construct");
|
|
|
|
}
|
|
|
|
|
|
|
|
CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
|
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
static bool classof(const CGCapturedStmtInfo *Info) {
|
|
|
|
return CGOpenMPRegionInfo::classof(Info) &&
|
|
|
|
cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
|
|
|
|
}
|
|
|
|
|
2015-02-26 18:27:34 +08:00
|
|
|
private:
|
|
|
|
/// \brief CodeGen info about outer OpenMP region.
|
|
|
|
CodeGenFunction::CGCapturedStmtInfo *OldCSI;
|
|
|
|
CGOpenMPRegionInfo *OuterRegionInfo;
|
2014-10-10 20:19:54 +08:00
|
|
|
};
|
2015-04-10 12:50:10 +08:00
|
|
|
|
2015-10-03 00:14:20 +08:00
|
|
|
/// \brief API for captured statement code generation in OpenMP target
|
|
|
|
/// constructs. For this captures, implicit parameters are used instead of the
|
2016-01-06 21:42:12 +08:00
|
|
|
/// captured fields. The name of the target region has to be unique in a given
|
|
|
|
/// application so it is provided by the client, because only the client has
|
|
|
|
/// the information to generate that.
|
2015-10-03 00:14:20 +08:00
|
|
|
class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo {
|
|
|
|
public:
|
|
|
|
CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
|
2016-01-06 21:42:12 +08:00
|
|
|
const RegionCodeGenTy &CodeGen, StringRef HelperName)
|
2015-10-03 00:14:20 +08:00
|
|
|
: CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
|
2016-01-06 21:42:12 +08:00
|
|
|
/*HasCancel=*/false),
|
|
|
|
HelperName(HelperName) {}
|
2015-10-03 00:14:20 +08:00
|
|
|
|
|
|
|
/// \brief This is unused for target regions because each starts executing
|
|
|
|
/// with a single thread.
|
|
|
|
const VarDecl *getThreadIDVariable() const override { return nullptr; }
|
|
|
|
|
|
|
|
/// \brief Get the name of the capture helper.
|
2016-01-06 21:42:12 +08:00
|
|
|
StringRef getHelperName() const override { return HelperName; }
|
2015-10-03 00:14:20 +08:00
|
|
|
|
|
|
|
static bool classof(const CGCapturedStmtInfo *Info) {
|
|
|
|
return CGOpenMPRegionInfo::classof(Info) &&
|
|
|
|
cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
|
|
|
|
}
|
2016-01-06 21:42:12 +08:00
|
|
|
|
|
|
|
private:
|
|
|
|
StringRef HelperName;
|
2015-10-03 00:14:20 +08:00
|
|
|
};
|
|
|
|
|
2016-03-28 20:58:34 +08:00
|
|
|
static void EmptyCodeGen(CodeGenFunction &) {
|
2016-03-04 00:20:23 +08:00
|
|
|
llvm_unreachable("No codegen for expressions");
|
|
|
|
}
|
|
|
|
/// \brief API for generation of expressions captured in a innermost OpenMP
|
|
|
|
/// region.
|
|
|
|
class CGOpenMPInnerExprInfo : public CGOpenMPInlinedRegionInfo {
|
|
|
|
public:
|
|
|
|
CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
|
|
|
|
: CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
|
|
|
|
OMPD_unknown,
|
|
|
|
/*HasCancel=*/false),
|
|
|
|
PrivScope(CGF) {
|
|
|
|
// Make sure the globals captured in the provided statement are local by
|
|
|
|
// using the privatization logic. We assume the same variable is not
|
|
|
|
// captured more than once.
|
|
|
|
for (auto &C : CS.captures()) {
|
|
|
|
if (!C.capturesVariable() && !C.capturesVariableByCopy())
|
|
|
|
continue;
|
|
|
|
|
|
|
|
const VarDecl *VD = C.getCapturedVar();
|
|
|
|
if (VD->isLocalVarDeclOrParm())
|
|
|
|
continue;
|
|
|
|
|
|
|
|
DeclRefExpr DRE(const_cast<VarDecl *>(VD),
|
|
|
|
/*RefersToEnclosingVariableOrCapture=*/false,
|
|
|
|
VD->getType().getNonReferenceType(), VK_LValue,
|
|
|
|
SourceLocation());
|
|
|
|
PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address {
|
|
|
|
return CGF.EmitLValue(&DRE).getAddress();
|
|
|
|
});
|
|
|
|
}
|
|
|
|
(void)PrivScope.Privatize();
|
|
|
|
}
|
|
|
|
|
|
|
|
/// \brief Lookup the captured field decl for a variable.
|
|
|
|
const FieldDecl *lookup(const VarDecl *VD) const override {
|
|
|
|
if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
|
|
|
|
return FD;
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// \brief Emit the captured statement body.
|
|
|
|
void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
|
|
|
|
llvm_unreachable("No body for expressions");
|
|
|
|
}
|
|
|
|
|
|
|
|
/// \brief Get a variable or parameter for storing global thread id
|
|
|
|
/// inside OpenMP construct.
|
|
|
|
const VarDecl *getThreadIDVariable() const override {
|
|
|
|
llvm_unreachable("No thread id for expressions");
|
|
|
|
}
|
|
|
|
|
|
|
|
/// \brief Get the name of the capture helper.
|
|
|
|
StringRef getHelperName() const override {
|
|
|
|
llvm_unreachable("No helper name for expressions");
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool classof(const CGCapturedStmtInfo *Info) { return false; }
|
|
|
|
|
|
|
|
private:
|
|
|
|
/// Private scope to capture global variables.
|
|
|
|
CodeGenFunction::OMPPrivateScope PrivScope;
|
|
|
|
};
|
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
/// \brief RAII for emitting code of OpenMP constructs.
|
|
|
|
class InlinedOpenMPRegionRAII {
|
|
|
|
CodeGenFunction &CGF;
|
|
|
|
|
|
|
|
public:
|
|
|
|
/// \brief Constructs region for combined constructs.
|
|
|
|
/// \param CodeGen Code generation sequence for combined directives. Includes
|
|
|
|
/// a list of functions used for code generation of implicitly inlined
|
|
|
|
/// regions.
|
2015-07-03 17:56:58 +08:00
|
|
|
InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
|
2015-09-15 20:52:43 +08:00
|
|
|
OpenMPDirectiveKind Kind, bool HasCancel)
|
2015-04-10 12:50:10 +08:00
|
|
|
: CGF(CGF) {
|
|
|
|
// Start emission for the construct.
|
2015-09-15 20:52:43 +08:00
|
|
|
CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
|
|
|
|
CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
|
2015-04-10 12:50:10 +08:00
|
|
|
}
|
2016-02-11 03:11:58 +08:00
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
~InlinedOpenMPRegionRAII() {
|
|
|
|
// Restore original CapturedStmtInfo only if we're done with code emission.
|
|
|
|
auto *OldCSI =
|
|
|
|
cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
|
|
|
|
delete CGF.CapturedStmtInfo;
|
|
|
|
CGF.CapturedStmtInfo = OldCSI;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2016-02-19 18:38:26 +08:00
|
|
|
/// \brief Values for bit flags used in the ident_t to describe the fields.
|
|
|
|
/// All enumeric elements are named and described in accordance with the code
|
|
|
|
/// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
|
|
|
|
enum OpenMPLocationFlags {
|
|
|
|
/// \brief Use trampoline for internal microtask.
|
|
|
|
OMP_IDENT_IMD = 0x01,
|
|
|
|
/// \brief Use c-style ident structure.
|
|
|
|
OMP_IDENT_KMPC = 0x02,
|
|
|
|
/// \brief Atomic reduction option for kmpc_reduce.
|
|
|
|
OMP_ATOMIC_REDUCE = 0x10,
|
|
|
|
/// \brief Explicit 'barrier' directive.
|
|
|
|
OMP_IDENT_BARRIER_EXPL = 0x20,
|
|
|
|
/// \brief Implicit barrier in code.
|
|
|
|
OMP_IDENT_BARRIER_IMPL = 0x40,
|
|
|
|
/// \brief Implicit barrier in 'for' directive.
|
|
|
|
OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
|
|
|
|
/// \brief Implicit barrier in 'sections' directive.
|
|
|
|
OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
|
|
|
|
/// \brief Implicit barrier in 'single' directive.
|
|
|
|
OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140
|
|
|
|
};
|
|
|
|
|
|
|
|
/// \brief Describes ident structure that describes a source location.
|
|
|
|
/// All descriptions are taken from
|
|
|
|
/// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
|
|
|
|
/// Original structure:
|
|
|
|
/// typedef struct ident {
|
|
|
|
/// kmp_int32 reserved_1; /**< might be used in Fortran;
|
|
|
|
/// see above */
|
|
|
|
/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
|
|
|
|
/// KMP_IDENT_KMPC identifies this union
|
|
|
|
/// member */
|
|
|
|
/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
|
|
|
|
/// see above */
|
|
|
|
///#if USE_ITT_BUILD
|
|
|
|
/// /* but currently used for storing
|
|
|
|
/// region-specific ITT */
|
|
|
|
/// /* contextual information. */
|
|
|
|
///#endif /* USE_ITT_BUILD */
|
|
|
|
/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
|
|
|
|
/// C++ */
|
|
|
|
/// char const *psource; /**< String describing the source location.
|
|
|
|
/// The string is composed of semi-colon separated
|
|
|
|
// fields which describe the source file,
|
|
|
|
/// the function and a pair of line numbers that
|
|
|
|
/// delimit the construct.
|
|
|
|
/// */
|
|
|
|
/// } ident_t;
|
|
|
|
enum IdentFieldIndex {
|
|
|
|
/// \brief might be used in Fortran
|
|
|
|
IdentField_Reserved_1,
|
|
|
|
/// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
|
|
|
|
IdentField_Flags,
|
|
|
|
/// \brief Not really used in Fortran any more
|
|
|
|
IdentField_Reserved_2,
|
|
|
|
/// \brief Source[4] in Fortran, do not use for C++
|
|
|
|
IdentField_Reserved_3,
|
|
|
|
/// \brief String describing the source location. The string is composed of
|
|
|
|
/// semi-colon separated fields which describe the source file, the function
|
|
|
|
/// and a pair of line numbers that delimit the construct.
|
|
|
|
IdentField_PSource
|
|
|
|
};
|
|
|
|
|
|
|
|
/// \brief Schedule types for 'omp for' loops (these enumerators are taken from
|
|
|
|
/// the enum sched_type in kmp.h).
|
|
|
|
enum OpenMPSchedType {
|
|
|
|
/// \brief Lower bound for default (unordered) versions.
|
|
|
|
OMP_sch_lower = 32,
|
|
|
|
OMP_sch_static_chunked = 33,
|
|
|
|
OMP_sch_static = 34,
|
|
|
|
OMP_sch_dynamic_chunked = 35,
|
|
|
|
OMP_sch_guided_chunked = 36,
|
|
|
|
OMP_sch_runtime = 37,
|
|
|
|
OMP_sch_auto = 38,
|
|
|
|
/// \brief Lower bound for 'ordered' versions.
|
|
|
|
OMP_ord_lower = 64,
|
|
|
|
OMP_ord_static_chunked = 65,
|
|
|
|
OMP_ord_static = 66,
|
|
|
|
OMP_ord_dynamic_chunked = 67,
|
|
|
|
OMP_ord_guided_chunked = 68,
|
|
|
|
OMP_ord_runtime = 69,
|
|
|
|
OMP_ord_auto = 70,
|
|
|
|
OMP_sch_default = OMP_sch_static,
|
2016-03-08 00:04:49 +08:00
|
|
|
/// \brief dist_schedule types
|
|
|
|
OMP_dist_sch_static_chunked = 91,
|
|
|
|
OMP_dist_sch_static = 92,
|
2016-02-19 18:38:26 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
enum OpenMPRTLFunction {
|
|
|
|
/// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
|
|
|
|
/// kmpc_micro microtask, ...);
|
|
|
|
OMPRTL__kmpc_fork_call,
|
|
|
|
/// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc,
|
|
|
|
/// kmp_int32 global_tid, void *data, size_t size, void ***cache);
|
|
|
|
OMPRTL__kmpc_threadprivate_cached,
|
|
|
|
/// \brief Call to void __kmpc_threadprivate_register( ident_t *,
|
|
|
|
/// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
|
|
|
|
OMPRTL__kmpc_threadprivate_register,
|
|
|
|
// Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
|
|
|
|
OMPRTL__kmpc_global_thread_num,
|
|
|
|
// Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_critical_name *crit);
|
|
|
|
OMPRTL__kmpc_critical,
|
|
|
|
// Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
|
|
|
|
// global_tid, kmp_critical_name *crit, uintptr_t hint);
|
|
|
|
OMPRTL__kmpc_critical_with_hint,
|
|
|
|
// Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_critical_name *crit);
|
|
|
|
OMPRTL__kmpc_end_critical,
|
|
|
|
// Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
|
|
|
|
// global_tid);
|
|
|
|
OMPRTL__kmpc_cancel_barrier,
|
|
|
|
// Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
|
|
|
|
OMPRTL__kmpc_barrier,
|
|
|
|
// Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
|
|
|
|
OMPRTL__kmpc_for_static_fini,
|
|
|
|
// Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
|
|
|
|
// global_tid);
|
|
|
|
OMPRTL__kmpc_serialized_parallel,
|
|
|
|
// Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
|
|
|
|
// global_tid);
|
|
|
|
OMPRTL__kmpc_end_serialized_parallel,
|
|
|
|
// Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_int32 num_threads);
|
|
|
|
OMPRTL__kmpc_push_num_threads,
|
|
|
|
// Call to void __kmpc_flush(ident_t *loc);
|
|
|
|
OMPRTL__kmpc_flush,
|
|
|
|
// Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
|
|
|
|
OMPRTL__kmpc_master,
|
|
|
|
// Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
|
|
|
|
OMPRTL__kmpc_end_master,
|
|
|
|
// Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
|
|
|
|
// int end_part);
|
|
|
|
OMPRTL__kmpc_omp_taskyield,
|
|
|
|
// Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
|
|
|
|
OMPRTL__kmpc_single,
|
|
|
|
// Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
|
|
|
|
OMPRTL__kmpc_end_single,
|
|
|
|
// Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
|
|
|
|
// kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
|
|
|
|
// kmp_routine_entry_t *task_entry);
|
|
|
|
OMPRTL__kmpc_omp_task_alloc,
|
|
|
|
// Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
|
|
|
|
// new_task);
|
|
|
|
OMPRTL__kmpc_omp_task,
|
|
|
|
// Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
|
|
|
|
// kmp_int32 didit);
|
|
|
|
OMPRTL__kmpc_copyprivate,
|
|
|
|
// Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
|
|
|
|
// (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
|
|
|
|
OMPRTL__kmpc_reduce,
|
|
|
|
// Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
|
|
|
|
// global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
|
|
|
|
// void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
|
|
|
|
// *lck);
|
|
|
|
OMPRTL__kmpc_reduce_nowait,
|
|
|
|
// Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_critical_name *lck);
|
|
|
|
OMPRTL__kmpc_end_reduce,
|
|
|
|
// Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_critical_name *lck);
|
|
|
|
OMPRTL__kmpc_end_reduce_nowait,
|
|
|
|
// Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
|
|
|
|
// kmp_task_t * new_task);
|
|
|
|
OMPRTL__kmpc_omp_task_begin_if0,
|
|
|
|
// Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
|
|
|
|
// kmp_task_t * new_task);
|
|
|
|
OMPRTL__kmpc_omp_task_complete_if0,
|
|
|
|
// Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
|
|
|
|
OMPRTL__kmpc_ordered,
|
|
|
|
// Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
|
|
|
|
OMPRTL__kmpc_end_ordered,
|
|
|
|
// Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
|
|
|
|
// global_tid);
|
|
|
|
OMPRTL__kmpc_omp_taskwait,
|
|
|
|
// Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
|
|
|
|
OMPRTL__kmpc_taskgroup,
|
|
|
|
// Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
|
|
|
|
OMPRTL__kmpc_end_taskgroup,
|
|
|
|
// Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// int proc_bind);
|
|
|
|
OMPRTL__kmpc_push_proc_bind,
|
|
|
|
// Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
|
|
|
|
// gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
|
|
|
|
// *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
|
|
|
|
OMPRTL__kmpc_omp_task_with_deps,
|
|
|
|
// Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
|
|
|
|
// gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
|
|
|
|
// ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
|
|
|
|
OMPRTL__kmpc_omp_wait_deps,
|
|
|
|
// Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
|
|
|
|
// global_tid, kmp_int32 cncl_kind);
|
|
|
|
OMPRTL__kmpc_cancellationpoint,
|
|
|
|
// Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_int32 cncl_kind);
|
|
|
|
OMPRTL__kmpc_cancel,
|
2016-03-04 04:34:23 +08:00
|
|
|
// Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_int32 num_teams, kmp_int32 thread_limit);
|
|
|
|
OMPRTL__kmpc_push_num_teams,
|
|
|
|
/// \brief Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc,
|
|
|
|
/// kmpc_micro microtask, ...);
|
|
|
|
OMPRTL__kmpc_fork_teams,
|
2016-02-19 18:38:26 +08:00
|
|
|
|
|
|
|
//
|
|
|
|
// Offloading related calls
|
|
|
|
//
|
|
|
|
// Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
|
|
|
|
// arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
|
|
|
|
// *arg_types);
|
|
|
|
OMPRTL__tgt_target,
|
2016-03-04 00:20:23 +08:00
|
|
|
// Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
|
|
|
|
// int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
|
|
|
|
// int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
|
|
|
|
OMPRTL__tgt_target_teams,
|
2016-02-19 18:38:26 +08:00
|
|
|
// Call to void __tgt_register_lib(__tgt_bin_desc *desc);
|
|
|
|
OMPRTL__tgt_register_lib,
|
|
|
|
// Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
|
|
|
|
OMPRTL__tgt_unregister_lib,
|
|
|
|
};
|
|
|
|
|
2015-09-11 01:07:54 +08:00
|
|
|
} // anonymous namespace
|
2014-10-10 20:19:54 +08:00
|
|
|
|
|
|
|
LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
|
2016-02-04 19:27:03 +08:00
|
|
|
return CGF.EmitLoadOfPointerLValue(
|
|
|
|
CGF.GetAddrOfLocalVar(getThreadIDVariable()),
|
|
|
|
getThreadIDVariable()->getType()->castAs<PointerType>());
|
2014-10-10 20:19:54 +08:00
|
|
|
}
|
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-04-10 12:50:10 +08:00
|
|
|
// 1.2.2 OpenMP Language Terminology
|
|
|
|
// Structured block - An executable statement with a single entry at the
|
|
|
|
// top and a single exit at the bottom.
|
|
|
|
// The point of exit cannot be a branch out of the structured block.
|
|
|
|
// longjmp() and throw() must not violate the entry/exit criteria.
|
|
|
|
CGF.EHStack.pushTerminate();
|
2016-03-28 20:58:34 +08:00
|
|
|
{
|
|
|
|
CodeGenFunction::RunCleanupsScope Scope(CGF);
|
|
|
|
CodeGen(CGF);
|
|
|
|
}
|
2015-04-10 12:50:10 +08:00
|
|
|
CGF.EHStack.popTerminate();
|
2014-10-08 22:01:46 +08:00
|
|
|
}
|
|
|
|
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
|
|
|
|
CodeGenFunction &CGF) {
|
2015-09-10 16:12:02 +08:00
|
|
|
return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
|
|
|
|
getThreadIDVariable()->getType(),
|
|
|
|
AlignmentSource::Decl);
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
}
|
|
|
|
|
2014-05-06 18:08:46 +08:00
|
|
|
CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
|
2016-03-04 17:22:22 +08:00
|
|
|
: CGM(CGM), OffloadEntriesInfoManager(CGM) {
|
2014-05-06 18:08:46 +08:00
|
|
|
IdentTy = llvm::StructType::create(
|
|
|
|
"ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
|
|
|
|
CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
|
2014-09-11 16:10:57 +08:00
|
|
|
CGM.Int8PtrTy /* psource */, nullptr);
|
2016-03-28 20:58:34 +08:00
|
|
|
// Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
|
|
|
|
llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
|
|
|
|
llvm::PointerType::getUnqual(CGM.Int32Ty)};
|
|
|
|
Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
|
2014-09-22 18:01:53 +08:00
|
|
|
KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
|
2016-01-06 21:42:12 +08:00
|
|
|
|
|
|
|
loadOffloadInfoMetadata();
|
2014-05-06 18:08:46 +08:00
|
|
|
}
|
|
|
|
|
2015-03-18 12:13:55 +08:00
|
|
|
void CGOpenMPRuntime::clear() {
|
|
|
|
InternalVars.clear();
|
|
|
|
}
|
|
|
|
|
2016-03-04 17:22:22 +08:00
|
|
|
static llvm::Function *
|
|
|
|
emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
|
|
|
|
const Expr *CombinerInitializer, const VarDecl *In,
|
|
|
|
const VarDecl *Out, bool IsCombiner) {
|
|
|
|
// void .omp_combiner.(Ty *in, Ty *out);
|
|
|
|
auto &C = CGM.getContext();
|
|
|
|
QualType PtrTy = C.getPointerType(Ty).withRestrict();
|
|
|
|
FunctionArgList Args;
|
|
|
|
ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
|
|
|
|
/*Id=*/nullptr, PtrTy);
|
2016-03-17 18:19:46 +08:00
|
|
|
ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
|
|
|
|
/*Id=*/nullptr, PtrTy);
|
2016-03-04 17:22:22 +08:00
|
|
|
Args.push_back(&OmpOutParm);
|
2016-03-17 18:19:46 +08:00
|
|
|
Args.push_back(&OmpInParm);
|
2016-03-04 17:22:22 +08:00
|
|
|
auto &FnInfo =
|
2016-03-11 12:30:31 +08:00
|
|
|
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
|
2016-03-04 17:22:22 +08:00
|
|
|
auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
|
|
|
|
auto *Fn = llvm::Function::Create(
|
|
|
|
FnTy, llvm::GlobalValue::InternalLinkage,
|
|
|
|
IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule());
|
|
|
|
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
|
2016-03-17 18:19:46 +08:00
|
|
|
Fn->addFnAttr(llvm::Attribute::AlwaysInline);
|
2016-03-04 17:22:22 +08:00
|
|
|
CodeGenFunction CGF(CGM);
|
|
|
|
// Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
|
|
|
|
// Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
|
|
|
|
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
|
|
|
|
CodeGenFunction::OMPPrivateScope Scope(CGF);
|
|
|
|
Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
|
|
|
|
Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address {
|
|
|
|
return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
|
|
|
|
.getAddress();
|
|
|
|
});
|
|
|
|
Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
|
|
|
|
Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address {
|
|
|
|
return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
|
|
|
|
.getAddress();
|
|
|
|
});
|
|
|
|
(void)Scope.Privatize();
|
|
|
|
CGF.EmitIgnoredExpr(CombinerInitializer);
|
|
|
|
Scope.ForceCleanup();
|
|
|
|
CGF.FinishFunction();
|
|
|
|
return Fn;
|
|
|
|
}
|
|
|
|
|
|
|
|
void CGOpenMPRuntime::emitUserDefinedReduction(
|
|
|
|
CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
|
|
|
|
if (UDRMap.count(D) > 0)
|
|
|
|
return;
|
|
|
|
auto &C = CGM.getContext();
|
|
|
|
if (!In || !Out) {
|
|
|
|
In = &C.Idents.get("omp_in");
|
|
|
|
Out = &C.Idents.get("omp_out");
|
|
|
|
}
|
|
|
|
llvm::Function *Combiner = emitCombinerOrInitializer(
|
|
|
|
CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()),
|
|
|
|
cast<VarDecl>(D->lookup(Out).front()),
|
|
|
|
/*IsCombiner=*/true);
|
|
|
|
llvm::Function *Initializer = nullptr;
|
|
|
|
if (auto *Init = D->getInitializer()) {
|
|
|
|
if (!Priv || !Orig) {
|
|
|
|
Priv = &C.Idents.get("omp_priv");
|
|
|
|
Orig = &C.Idents.get("omp_orig");
|
|
|
|
}
|
|
|
|
Initializer = emitCombinerOrInitializer(
|
|
|
|
CGM, D->getType(), Init, cast<VarDecl>(D->lookup(Orig).front()),
|
|
|
|
cast<VarDecl>(D->lookup(Priv).front()),
|
|
|
|
/*IsCombiner=*/false);
|
|
|
|
}
|
|
|
|
UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer)));
|
|
|
|
if (CGF) {
|
|
|
|
auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
|
|
|
|
Decls.second.push_back(D);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-03-17 18:19:46 +08:00
|
|
|
std::pair<llvm::Function *, llvm::Function *>
|
|
|
|
CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
|
|
|
|
auto I = UDRMap.find(D);
|
|
|
|
if (I != UDRMap.end())
|
|
|
|
return I->second;
|
|
|
|
emitUserDefinedReduction(/*CGF=*/nullptr, D);
|
|
|
|
return UDRMap.lookup(D);
|
|
|
|
}
|
|
|
|
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
// Layout information for ident_t.
|
|
|
|
static CharUnits getIdentAlign(CodeGenModule &CGM) {
|
|
|
|
return CGM.getPointerAlign();
|
|
|
|
}
|
|
|
|
static CharUnits getIdentSize(CodeGenModule &CGM) {
|
|
|
|
assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
|
|
|
|
return CharUnits::fromQuantity(16) + CGM.getPointerSize();
|
|
|
|
}
|
2016-02-19 18:38:26 +08:00
|
|
|
static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) {
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
// All the fields except the last are i32, so this works beautifully.
|
|
|
|
return unsigned(Field) * CharUnits::fromQuantity(4);
|
|
|
|
}
|
|
|
|
static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
|
2016-02-19 18:38:26 +08:00
|
|
|
IdentFieldIndex Field,
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
const llvm::Twine &Name = "") {
|
|
|
|
auto Offset = getOffsetOfIdentField(Field);
|
|
|
|
return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
|
|
|
|
}
|
|
|
|
|
2016-03-04 04:34:23 +08:00
|
|
|
llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
|
2015-07-03 17:56:58 +08:00
|
|
|
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
|
|
|
|
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
assert(ThreadIDVar->getType()->isPointerType() &&
|
|
|
|
"thread id variable must be of type kmp_int32 *");
|
2014-10-10 20:19:54 +08:00
|
|
|
const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
|
|
|
|
CodeGenFunction CGF(CGM, true);
|
2015-09-15 20:52:43 +08:00
|
|
|
bool HasCancel = false;
|
|
|
|
if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
|
|
|
|
HasCancel = OPD->hasCancel();
|
|
|
|
else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
|
|
|
|
HasCancel = OPSD->hasCancel();
|
|
|
|
else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
|
|
|
|
HasCancel = OPFD->hasCancel();
|
|
|
|
CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
|
|
|
|
HasCancel);
|
2015-06-24 11:35:38 +08:00
|
|
|
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
|
2015-09-10 16:12:02 +08:00
|
|
|
return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
|
2014-10-10 20:19:54 +08:00
|
|
|
}
|
|
|
|
|
2015-07-03 17:56:58 +08:00
|
|
|
llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
|
|
|
|
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
|
|
|
|
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
assert(!ThreadIDVar->getType()->isPointerType() &&
|
|
|
|
"thread id variable must be of type kmp_int32 for tasks");
|
|
|
|
auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
|
|
|
|
CodeGenFunction CGF(CGM, true);
|
2015-07-03 17:56:58 +08:00
|
|
|
CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
|
2015-09-15 20:52:43 +08:00
|
|
|
InnermostKind,
|
|
|
|
cast<OMPTaskDirective>(D).hasCancel());
|
2015-06-24 11:35:38 +08:00
|
|
|
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
return CGF.GenerateCapturedStmtFunction(*CS);
|
|
|
|
}
|
|
|
|
|
2016-02-19 18:38:26 +08:00
|
|
|
Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CharUnits Align = getIdentAlign(CGM);
|
2014-05-07 14:18:01 +08:00
|
|
|
llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
|
2014-05-06 18:08:46 +08:00
|
|
|
if (!Entry) {
|
|
|
|
if (!DefaultOpenMPPSource) {
|
|
|
|
// Initialize default location for psource field of ident_t structure of
|
|
|
|
// all ident_t objects. Format is ";file;function;line;column;;".
|
|
|
|
// Taken from
|
|
|
|
// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
|
|
|
|
DefaultOpenMPPSource =
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
|
2014-05-06 18:08:46 +08:00
|
|
|
DefaultOpenMPPSource =
|
|
|
|
llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
|
|
|
|
}
|
2014-10-08 22:01:46 +08:00
|
|
|
auto DefaultOpenMPLocation = new llvm::GlobalVariable(
|
|
|
|
CGM.getModule(), IdentTy, /*isConstant*/ true,
|
|
|
|
llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
|
2014-05-06 18:08:46 +08:00
|
|
|
DefaultOpenMPLocation->setUnnamedAddr(true);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
DefaultOpenMPLocation->setAlignment(Align.getQuantity());
|
2014-05-06 18:08:46 +08:00
|
|
|
|
|
|
|
llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
|
2014-06-18 15:08:49 +08:00
|
|
|
llvm::Constant *Values[] = {Zero,
|
|
|
|
llvm::ConstantInt::get(CGM.Int32Ty, Flags),
|
|
|
|
Zero, Zero, DefaultOpenMPPSource};
|
2014-05-06 18:08:46 +08:00
|
|
|
llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
|
|
|
|
DefaultOpenMPLocation->setInitializer(Init);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
|
2014-05-06 18:08:46 +08:00
|
|
|
}
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
return Address(Entry, Align);
|
2014-05-06 18:08:46 +08:00
|
|
|
}
|
|
|
|
|
2015-02-25 16:32:46 +08:00
|
|
|
llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
|
|
|
|
SourceLocation Loc,
|
2016-02-19 18:38:26 +08:00
|
|
|
unsigned Flags) {
|
|
|
|
Flags |= OMP_IDENT_KMPC;
|
2014-05-06 18:08:46 +08:00
|
|
|
// If no debug info is generated - return global default location.
|
2016-02-02 19:06:51 +08:00
|
|
|
if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
|
2014-05-06 18:08:46 +08:00
|
|
|
Loc.isInvalid())
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
return getOrCreateDefaultLocation(Flags).getPointer();
|
2014-05-06 18:08:46 +08:00
|
|
|
|
|
|
|
assert(CGF.CurFn && "No function in current CodeGenFunction.");
|
|
|
|
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address LocValue = Address::invalid();
|
2014-12-03 20:11:24 +08:00
|
|
|
auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
|
|
|
|
if (I != OpenMPLocThreadIDMap.end())
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
|
|
|
|
|
2014-12-15 15:07:06 +08:00
|
|
|
// OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
|
|
|
|
// GetOpenMPThreadID was called before this routine.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
if (!LocValue.isValid()) {
|
2014-05-07 14:18:01 +08:00
|
|
|
// Generate "ident_t .kmpc_loc.addr;"
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
|
|
|
|
".kmpc_loc.addr");
|
2014-10-10 20:19:54 +08:00
|
|
|
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Elem.second.DebugLoc = AI.getPointer();
|
2014-05-06 18:08:46 +08:00
|
|
|
LocValue = AI;
|
|
|
|
|
|
|
|
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
|
|
|
|
CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
|
2015-02-25 16:32:46 +08:00
|
|
|
CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CGM.getSize(getIdentSize(CGF.CGM)));
|
2014-05-06 18:08:46 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// char **psource = &.kmpc_loc_<flags>.addr.psource;
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
|
2014-05-06 18:08:46 +08:00
|
|
|
|
2014-05-30 13:48:40 +08:00
|
|
|
auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
|
|
|
|
if (OMPDebugLoc == nullptr) {
|
|
|
|
SmallString<128> Buffer2;
|
|
|
|
llvm::raw_svector_ostream OS2(Buffer2);
|
|
|
|
// Build debug location
|
|
|
|
PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
|
|
|
|
OS2 << ";" << PLoc.getFilename() << ";";
|
|
|
|
if (const FunctionDecl *FD =
|
|
|
|
dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
|
|
|
|
OS2 << FD->getQualifiedNameAsString();
|
|
|
|
}
|
|
|
|
OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
|
|
|
|
OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
|
|
|
|
OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
|
2014-05-06 18:08:46 +08:00
|
|
|
}
|
|
|
|
// *psource = ";<File>;<Function>;<Line>;<Column>;;";
|
2014-05-30 13:48:40 +08:00
|
|
|
CGF.Builder.CreateStore(OMPDebugLoc, PSource);
|
|
|
|
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
// Our callers always pass this to a runtime function, so for
|
|
|
|
// convenience, go ahead and return a naked pointer.
|
|
|
|
return LocValue.getPointer();
|
2014-05-06 18:08:46 +08:00
|
|
|
}
|
|
|
|
|
2015-02-25 16:32:46 +08:00
|
|
|
llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
|
|
|
|
SourceLocation Loc) {
|
2014-05-06 18:08:46 +08:00
|
|
|
assert(CGF.CurFn && "No function in current CodeGenFunction.");
|
|
|
|
|
2014-10-08 22:01:46 +08:00
|
|
|
llvm::Value *ThreadID = nullptr;
|
2014-10-10 20:19:54 +08:00
|
|
|
// Check whether we've already cached a load of the thread id in this
|
|
|
|
// function.
|
2014-12-03 20:11:24 +08:00
|
|
|
auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
|
2014-10-10 20:19:54 +08:00
|
|
|
if (I != OpenMPLocThreadIDMap.end()) {
|
|
|
|
ThreadID = I->second.ThreadID;
|
2014-10-21 11:16:40 +08:00
|
|
|
if (ThreadID != nullptr)
|
|
|
|
return ThreadID;
|
|
|
|
}
|
2016-01-22 16:56:50 +08:00
|
|
|
if (auto *OMPRegionInfo =
|
2014-12-03 20:11:24 +08:00
|
|
|
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
if (OMPRegionInfo->getThreadIDVariable()) {
|
2015-02-26 18:27:34 +08:00
|
|
|
// Check if this an outlined function with thread id passed as argument.
|
|
|
|
auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
|
|
|
|
ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
|
|
|
|
// If value loaded in entry block, cache it and use it everywhere in
|
|
|
|
// function.
|
|
|
|
if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
|
|
|
|
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
|
|
|
|
Elem.second.ThreadID = ThreadID;
|
|
|
|
}
|
|
|
|
return ThreadID;
|
2014-07-25 15:55:17 +08:00
|
|
|
}
|
2014-05-06 18:08:46 +08:00
|
|
|
}
|
2015-02-26 18:27:34 +08:00
|
|
|
|
|
|
|
// This is not an outlined function region - need to call __kmpc_int32
|
|
|
|
// kmpc_global_thread_num(ident_t *loc).
|
|
|
|
// Generate thread id value and cache this value for use across the
|
|
|
|
// function.
|
|
|
|
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
|
|
|
|
CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
|
|
|
|
ThreadID =
|
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
|
|
|
|
emitUpdateLocation(CGF, Loc));
|
|
|
|
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
|
|
|
|
Elem.second.ThreadID = ThreadID;
|
2014-10-08 22:01:46 +08:00
|
|
|
return ThreadID;
|
2014-05-06 18:08:46 +08:00
|
|
|
}
|
|
|
|
|
2015-02-25 16:32:46 +08:00
|
|
|
void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
|
2014-05-06 18:08:46 +08:00
|
|
|
assert(CGF.CurFn && "No function in current CodeGenFunction.");
|
2014-10-21 11:16:40 +08:00
|
|
|
if (OpenMPLocThreadIDMap.count(CGF.CurFn))
|
|
|
|
OpenMPLocThreadIDMap.erase(CGF.CurFn);
|
2016-03-04 17:22:22 +08:00
|
|
|
if (FunctionUDRMap.count(CGF.CurFn) > 0) {
|
|
|
|
for(auto *D : FunctionUDRMap[CGF.CurFn]) {
|
|
|
|
UDRMap.erase(D);
|
|
|
|
}
|
|
|
|
FunctionUDRMap.erase(CGF.CurFn);
|
|
|
|
}
|
2014-05-06 18:08:46 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
|
|
|
|
return llvm::PointerType::getUnqual(IdentTy);
|
|
|
|
}
|
|
|
|
|
|
|
|
llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
|
|
|
|
return llvm::PointerType::getUnqual(Kmpc_MicroTy);
|
|
|
|
}
|
|
|
|
|
|
|
|
llvm::Constant *
|
2016-02-19 18:38:26 +08:00
|
|
|
CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
|
2014-05-06 18:08:46 +08:00
|
|
|
llvm::Constant *RTLFn = nullptr;
|
2016-02-19 18:38:26 +08:00
|
|
|
switch (static_cast<OpenMPRTLFunction>(Function)) {
|
2014-05-06 18:08:46 +08:00
|
|
|
case OMPRTL__kmpc_fork_call: {
|
|
|
|
// Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
|
|
|
|
// microtask, ...);
|
2014-06-18 15:08:49 +08:00
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
|
|
|
|
getKmpc_MicroPointerTy()};
|
2014-05-06 18:08:46 +08:00
|
|
|
llvm::FunctionType *FnTy =
|
[OPENMP] Codegen for 'if' clause in 'parallel' directive.
Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive.
If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
global_thread_id.addr = alloca i32
store i32 global_thread_id, global_thread_id.addr
zero.addr = alloca i32
store i32 0, zero.addr
kmpc_serialized_parallel(loc, global_thread_id);
microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/);
kmpc_end_serialized_parallel(loc, global_thread_id);
Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call.
Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D4716
llvm-svn: 219597
2014-10-13 14:02:40 +08:00
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
|
2014-05-06 18:08:46 +08:00
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__kmpc_global_thread_num: {
|
|
|
|
// Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
|
2014-06-18 15:08:49 +08:00
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
|
2014-05-06 18:08:46 +08:00
|
|
|
llvm::FunctionType *FnTy =
|
[OPENMP] Codegen for 'if' clause in 'parallel' directive.
Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive.
If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
global_thread_id.addr = alloca i32
store i32 global_thread_id, global_thread_id.addr
zero.addr = alloca i32
store i32 0, zero.addr
kmpc_serialized_parallel(loc, global_thread_id);
microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/);
kmpc_end_serialized_parallel(loc, global_thread_id);
Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call.
Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D4716
llvm-svn: 219597
2014-10-13 14:02:40 +08:00
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
|
2014-05-06 18:08:46 +08:00
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
|
|
|
|
break;
|
|
|
|
}
|
2014-11-11 12:05:39 +08:00
|
|
|
case OMPRTL__kmpc_threadprivate_cached: {
|
|
|
|
// Build void *__kmpc_threadprivate_cached(ident_t *loc,
|
|
|
|
// kmp_int32 global_tid, void *data, size_t size, void ***cache);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
|
|
|
|
CGM.VoidPtrTy, CGM.SizeTy,
|
|
|
|
CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
|
|
|
|
break;
|
|
|
|
}
|
2014-09-22 18:01:53 +08:00
|
|
|
case OMPRTL__kmpc_critical: {
|
2014-09-22 20:32:31 +08:00
|
|
|
// Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_critical_name *crit);
|
2014-09-22 18:01:53 +08:00
|
|
|
llvm::Type *TypeParams[] = {
|
|
|
|
getIdentTyPointerTy(), CGM.Int32Ty,
|
|
|
|
llvm::PointerType::getUnqual(KmpCriticalNameTy)};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
|
|
|
|
break;
|
|
|
|
}
|
2015-12-15 18:55:09 +08:00
|
|
|
case OMPRTL__kmpc_critical_with_hint: {
|
|
|
|
// Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_critical_name *crit, uintptr_t hint);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
|
|
|
|
llvm::PointerType::getUnqual(KmpCriticalNameTy),
|
|
|
|
CGM.IntPtrTy};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
|
|
|
|
break;
|
|
|
|
}
|
2014-11-11 12:05:39 +08:00
|
|
|
case OMPRTL__kmpc_threadprivate_register: {
|
|
|
|
// Build void __kmpc_threadprivate_register(ident_t *, void *data,
|
|
|
|
// kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
|
|
|
|
// typedef void *(*kmpc_ctor)(void *);
|
|
|
|
auto KmpcCtorTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
|
|
|
|
/*isVarArg*/ false)->getPointerTo();
|
|
|
|
// typedef void *(*kmpc_cctor)(void *, void *);
|
|
|
|
llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
|
|
|
|
auto KmpcCopyCtorTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
|
|
|
|
/*isVarArg*/ false)->getPointerTo();
|
|
|
|
// typedef void (*kmpc_dtor)(void *);
|
|
|
|
auto KmpcDtorTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
|
|
|
|
->getPointerTo();
|
|
|
|
llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
|
|
|
|
KmpcCopyCtorTy, KmpcDtorTy};
|
|
|
|
auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
|
|
|
|
/*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
|
|
|
|
break;
|
|
|
|
}
|
2014-09-22 18:01:53 +08:00
|
|
|
case OMPRTL__kmpc_end_critical: {
|
2014-09-22 20:32:31 +08:00
|
|
|
// Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_critical_name *crit);
|
2014-09-22 18:01:53 +08:00
|
|
|
llvm::Type *TypeParams[] = {
|
|
|
|
getIdentTyPointerTy(), CGM.Int32Ty,
|
|
|
|
llvm::PointerType::getUnqual(KmpCriticalNameTy)};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
|
|
|
|
break;
|
|
|
|
}
|
2014-12-05 12:09:23 +08:00
|
|
|
case OMPRTL__kmpc_cancel_barrier: {
|
|
|
|
// Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
|
|
|
|
// global_tid);
|
2014-10-08 22:01:46 +08:00
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
2014-12-05 12:09:23 +08:00
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
|
2014-10-08 22:01:46 +08:00
|
|
|
break;
|
|
|
|
}
|
2015-07-03 17:56:58 +08:00
|
|
|
case OMPRTL__kmpc_barrier: {
|
2015-07-06 13:50:32 +08:00
|
|
|
// Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
|
2015-07-03 17:56:58 +08:00
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
|
|
|
|
break;
|
|
|
|
}
|
2014-12-15 15:07:06 +08:00
|
|
|
case OMPRTL__kmpc_for_static_fini: {
|
|
|
|
// Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
|
|
|
|
break;
|
|
|
|
}
|
2014-10-13 16:23:51 +08:00
|
|
|
case OMPRTL__kmpc_push_num_threads: {
|
|
|
|
// Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_int32 num_threads)
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
|
|
|
|
CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
|
|
|
|
break;
|
|
|
|
}
|
[OPENMP] Codegen for 'if' clause in 'parallel' directive.
Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive.
If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
global_thread_id.addr = alloca i32
store i32 global_thread_id, global_thread_id.addr
zero.addr = alloca i32
store i32 0, zero.addr
kmpc_serialized_parallel(loc, global_thread_id);
microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/);
kmpc_end_serialized_parallel(loc, global_thread_id);
Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call.
Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D4716
llvm-svn: 219597
2014-10-13 14:02:40 +08:00
|
|
|
case OMPRTL__kmpc_serialized_parallel: {
|
|
|
|
// Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
|
|
|
|
// global_tid);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__kmpc_end_serialized_parallel: {
|
|
|
|
// Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
|
|
|
|
// global_tid);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
|
|
|
|
break;
|
|
|
|
}
|
2014-11-20 12:34:54 +08:00
|
|
|
case OMPRTL__kmpc_flush: {
|
2015-02-24 20:55:09 +08:00
|
|
|
// Build void __kmpc_flush(ident_t *loc);
|
2014-11-20 12:34:54 +08:00
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
|
|
|
|
llvm::FunctionType *FnTy =
|
2015-02-24 20:55:09 +08:00
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
|
2014-11-20 12:34:54 +08:00
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
|
|
|
|
break;
|
|
|
|
}
|
2014-12-04 15:23:53 +08:00
|
|
|
case OMPRTL__kmpc_master: {
|
|
|
|
// Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__kmpc_end_master: {
|
|
|
|
// Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
|
|
|
|
break;
|
|
|
|
}
|
2015-02-05 13:57:51 +08:00
|
|
|
case OMPRTL__kmpc_omp_taskyield: {
|
|
|
|
// Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
|
|
|
|
// int end_part);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
|
|
|
|
break;
|
|
|
|
}
|
2015-02-05 14:35:41 +08:00
|
|
|
case OMPRTL__kmpc_single: {
|
|
|
|
// Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__kmpc_end_single: {
|
|
|
|
// Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
|
|
|
|
break;
|
|
|
|
}
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
case OMPRTL__kmpc_omp_task_alloc: {
|
|
|
|
// Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
|
|
|
|
// kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
|
|
|
|
// kmp_routine_entry_t *task_entry);
|
|
|
|
assert(KmpRoutineEntryPtrTy != nullptr &&
|
|
|
|
"Type kmp_routine_entry_t must be created.");
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
|
|
|
|
CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
|
|
|
|
// Return void * and then cast to particular kmp_task_t type.
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__kmpc_omp_task: {
|
|
|
|
// Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
|
|
|
|
// *new_task);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
|
|
|
|
CGM.VoidPtrTy};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
|
|
|
|
break;
|
|
|
|
}
|
2015-03-23 14:18:07 +08:00
|
|
|
case OMPRTL__kmpc_copyprivate: {
|
|
|
|
// Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
|
2015-04-30 11:47:32 +08:00
|
|
|
// size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
|
2015-03-23 14:18:07 +08:00
|
|
|
// kmp_int32 didit);
|
|
|
|
llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
|
|
|
|
auto *CpyFnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
|
2015-04-30 11:47:32 +08:00
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
|
2015-03-23 14:18:07 +08:00
|
|
|
CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
|
|
|
|
CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
|
|
|
|
break;
|
|
|
|
}
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
case OMPRTL__kmpc_reduce: {
|
|
|
|
// Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
|
|
|
|
// (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
|
|
|
|
llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
|
|
|
|
auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
|
|
|
|
/*isVarArg=*/false);
|
|
|
|
llvm::Type *TypeParams[] = {
|
|
|
|
getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
|
|
|
|
CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
|
|
|
|
llvm::PointerType::getUnqual(KmpCriticalNameTy)};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__kmpc_reduce_nowait: {
|
|
|
|
// Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
|
|
|
|
// global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
|
|
|
|
// void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
|
|
|
|
// *lck);
|
|
|
|
llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
|
|
|
|
auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
|
|
|
|
/*isVarArg=*/false);
|
|
|
|
llvm::Type *TypeParams[] = {
|
|
|
|
getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
|
|
|
|
CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
|
|
|
|
llvm::PointerType::getUnqual(KmpCriticalNameTy)};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__kmpc_end_reduce: {
|
|
|
|
// Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_critical_name *lck);
|
|
|
|
llvm::Type *TypeParams[] = {
|
|
|
|
getIdentTyPointerTy(), CGM.Int32Ty,
|
|
|
|
llvm::PointerType::getUnqual(KmpCriticalNameTy)};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__kmpc_end_reduce_nowait: {
|
|
|
|
// Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_critical_name *lck);
|
|
|
|
llvm::Type *TypeParams[] = {
|
|
|
|
getIdentTyPointerTy(), CGM.Int32Ty,
|
|
|
|
llvm::PointerType::getUnqual(KmpCriticalNameTy)};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn =
|
|
|
|
CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
|
|
|
|
break;
|
|
|
|
}
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
case OMPRTL__kmpc_omp_task_begin_if0: {
|
|
|
|
// Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
|
|
|
|
// *new_task);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
|
|
|
|
CGM.VoidPtrTy};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn =
|
|
|
|
CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__kmpc_omp_task_complete_if0: {
|
|
|
|
// Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
|
|
|
|
// *new_task);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
|
|
|
|
CGM.VoidPtrTy};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy,
|
|
|
|
/*Name=*/"__kmpc_omp_task_complete_if0");
|
|
|
|
break;
|
|
|
|
}
|
2015-04-22 19:15:40 +08:00
|
|
|
case OMPRTL__kmpc_ordered: {
|
|
|
|
// Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__kmpc_end_ordered: {
|
2015-06-18 20:14:09 +08:00
|
|
|
// Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
|
2015-04-22 19:15:40 +08:00
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
|
|
|
|
break;
|
|
|
|
}
|
2015-04-27 13:22:09 +08:00
|
|
|
case OMPRTL__kmpc_omp_taskwait: {
|
|
|
|
// Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
|
|
|
|
break;
|
|
|
|
}
|
2015-06-18 20:14:09 +08:00
|
|
|
case OMPRTL__kmpc_taskgroup: {
|
|
|
|
// Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__kmpc_end_taskgroup: {
|
|
|
|
// Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
|
|
|
|
break;
|
|
|
|
}
|
2015-06-18 21:40:03 +08:00
|
|
|
case OMPRTL__kmpc_push_proc_bind: {
|
|
|
|
// Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// int proc_bind)
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
|
|
|
|
break;
|
|
|
|
}
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
case OMPRTL__kmpc_omp_task_with_deps: {
|
|
|
|
// Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
|
|
|
|
// kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
|
|
|
|
// kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
|
|
|
|
llvm::Type *TypeParams[] = {
|
|
|
|
getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
|
|
|
|
CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn =
|
|
|
|
CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__kmpc_omp_wait_deps: {
|
|
|
|
// Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
|
|
|
|
// kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
|
|
|
|
// kmp_depend_info_t *noalias_dep_list);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
|
|
|
|
CGM.Int32Ty, CGM.VoidPtrTy,
|
|
|
|
CGM.Int32Ty, CGM.VoidPtrTy};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
|
|
|
|
break;
|
|
|
|
}
|
2015-07-02 12:17:07 +08:00
|
|
|
case OMPRTL__kmpc_cancellationpoint: {
|
|
|
|
// Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
|
|
|
|
// global_tid, kmp_int32 cncl_kind)
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
|
|
|
|
break;
|
|
|
|
}
|
2015-07-06 13:50:32 +08:00
|
|
|
case OMPRTL__kmpc_cancel: {
|
|
|
|
// Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_int32 cncl_kind)
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
|
|
|
|
break;
|
|
|
|
}
|
2016-03-04 04:34:23 +08:00
|
|
|
case OMPRTL__kmpc_push_num_teams: {
|
|
|
|
// Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
|
|
|
|
// kmp_int32 num_teams, kmp_int32 num_threads)
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
|
|
|
|
CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__kmpc_fork_teams: {
|
|
|
|
// Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
|
|
|
|
// microtask, ...);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
|
|
|
|
getKmpc_MicroPointerTy()};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
|
|
|
|
break;
|
|
|
|
}
|
2015-10-03 00:14:20 +08:00
|
|
|
case OMPRTL__tgt_target: {
|
|
|
|
// Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
|
|
|
|
// arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
|
|
|
|
// *arg_types);
|
|
|
|
llvm::Type *TypeParams[] = {CGM.Int32Ty,
|
|
|
|
CGM.VoidPtrTy,
|
|
|
|
CGM.Int32Ty,
|
|
|
|
CGM.VoidPtrPtrTy,
|
|
|
|
CGM.VoidPtrPtrTy,
|
|
|
|
CGM.SizeTy->getPointerTo(),
|
|
|
|
CGM.Int32Ty->getPointerTo()};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
|
|
|
|
break;
|
|
|
|
}
|
2016-03-04 00:20:23 +08:00
|
|
|
case OMPRTL__tgt_target_teams: {
|
|
|
|
// Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
|
|
|
|
// int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
|
|
|
|
// int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
|
|
|
|
llvm::Type *TypeParams[] = {CGM.Int32Ty,
|
|
|
|
CGM.VoidPtrTy,
|
|
|
|
CGM.Int32Ty,
|
|
|
|
CGM.VoidPtrPtrTy,
|
|
|
|
CGM.VoidPtrPtrTy,
|
|
|
|
CGM.SizeTy->getPointerTo(),
|
|
|
|
CGM.Int32Ty->getPointerTo(),
|
|
|
|
CGM.Int32Ty,
|
|
|
|
CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
|
|
|
|
break;
|
|
|
|
}
|
2016-01-06 21:42:12 +08:00
|
|
|
case OMPRTL__tgt_register_lib: {
|
|
|
|
// Build void __tgt_register_lib(__tgt_bin_desc *desc);
|
|
|
|
QualType ParamTy =
|
|
|
|
CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
|
|
|
|
llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__tgt_unregister_lib: {
|
|
|
|
// Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
|
|
|
|
QualType ParamTy =
|
|
|
|
CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
|
|
|
|
llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
|
|
|
|
break;
|
|
|
|
}
|
2014-05-06 18:08:46 +08:00
|
|
|
}
|
2016-02-19 18:38:26 +08:00
|
|
|
assert(RTLFn && "Unable to find OpenMP runtime function");
|
2014-05-06 18:08:46 +08:00
|
|
|
return RTLFn;
|
|
|
|
}
|
2014-09-22 18:01:53 +08:00
|
|
|
|
2015-03-13 18:38:23 +08:00
|
|
|
llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
|
|
|
|
bool IVSigned) {
|
|
|
|
assert((IVSize == 32 || IVSize == 64) &&
|
|
|
|
"IV size is not compatible with the omp runtime");
|
|
|
|
auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
|
|
|
|
: "__kmpc_for_static_init_4u")
|
|
|
|
: (IVSigned ? "__kmpc_for_static_init_8"
|
|
|
|
: "__kmpc_for_static_init_8u");
|
|
|
|
auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
|
|
|
|
auto PtrTy = llvm::PointerType::getUnqual(ITy);
|
|
|
|
llvm::Type *TypeParams[] = {
|
|
|
|
getIdentTyPointerTy(), // loc
|
|
|
|
CGM.Int32Ty, // tid
|
|
|
|
CGM.Int32Ty, // schedtype
|
|
|
|
llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
|
|
|
|
PtrTy, // p_lower
|
|
|
|
PtrTy, // p_upper
|
|
|
|
PtrTy, // p_stride
|
|
|
|
ITy, // incr
|
|
|
|
ITy // chunk
|
|
|
|
};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
|
|
|
|
return CGM.CreateRuntimeFunction(FnTy, Name);
|
|
|
|
}
|
|
|
|
|
2015-03-12 21:37:50 +08:00
|
|
|
llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
|
|
|
|
bool IVSigned) {
|
|
|
|
assert((IVSize == 32 || IVSize == 64) &&
|
|
|
|
"IV size is not compatible with the omp runtime");
|
|
|
|
auto Name =
|
|
|
|
IVSize == 32
|
|
|
|
? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
|
|
|
|
: (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
|
|
|
|
auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
|
|
|
|
llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
|
|
|
|
CGM.Int32Ty, // tid
|
|
|
|
CGM.Int32Ty, // schedtype
|
|
|
|
ITy, // lower
|
|
|
|
ITy, // upper
|
|
|
|
ITy, // stride
|
|
|
|
ITy // chunk
|
|
|
|
};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
|
|
|
|
return CGM.CreateRuntimeFunction(FnTy, Name);
|
|
|
|
}
|
|
|
|
|
2015-04-22 19:15:40 +08:00
|
|
|
llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
|
|
|
|
bool IVSigned) {
|
|
|
|
assert((IVSize == 32 || IVSize == 64) &&
|
|
|
|
"IV size is not compatible with the omp runtime");
|
|
|
|
auto Name =
|
|
|
|
IVSize == 32
|
|
|
|
? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
|
|
|
|
: (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
|
|
|
|
llvm::Type *TypeParams[] = {
|
|
|
|
getIdentTyPointerTy(), // loc
|
|
|
|
CGM.Int32Ty, // tid
|
|
|
|
};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
return CGM.CreateRuntimeFunction(FnTy, Name);
|
|
|
|
}
|
|
|
|
|
2015-03-12 21:37:50 +08:00
|
|
|
llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
|
|
|
|
bool IVSigned) {
|
|
|
|
assert((IVSize == 32 || IVSize == 64) &&
|
|
|
|
"IV size is not compatible with the omp runtime");
|
|
|
|
auto Name =
|
|
|
|
IVSize == 32
|
|
|
|
? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
|
|
|
|
: (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
|
|
|
|
auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
|
|
|
|
auto PtrTy = llvm::PointerType::getUnqual(ITy);
|
|
|
|
llvm::Type *TypeParams[] = {
|
|
|
|
getIdentTyPointerTy(), // loc
|
|
|
|
CGM.Int32Ty, // tid
|
|
|
|
llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
|
|
|
|
PtrTy, // p_lower
|
|
|
|
PtrTy, // p_upper
|
|
|
|
PtrTy // p_stride
|
|
|
|
};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
|
|
|
|
return CGM.CreateRuntimeFunction(FnTy, Name);
|
|
|
|
}
|
|
|
|
|
2014-11-11 12:05:39 +08:00
|
|
|
llvm::Constant *
|
|
|
|
CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
|
2015-07-14 06:54:53 +08:00
|
|
|
assert(!CGM.getLangOpts().OpenMPUseTLS ||
|
|
|
|
!CGM.getContext().getTargetInfo().isTLSSupported());
|
2014-11-11 12:05:39 +08:00
|
|
|
// Lookup the entry, lazily creating it if necessary.
|
2015-02-25 16:32:46 +08:00
|
|
|
return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
|
2014-11-11 12:05:39 +08:00
|
|
|
Twine(CGM.getMangledName(VD)) + ".cache.");
|
|
|
|
}
|
|
|
|
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
|
|
|
|
const VarDecl *VD,
|
|
|
|
Address VDAddr,
|
|
|
|
SourceLocation Loc) {
|
2015-07-14 06:54:53 +08:00
|
|
|
if (CGM.getLangOpts().OpenMPUseTLS &&
|
|
|
|
CGM.getContext().getTargetInfo().isTLSSupported())
|
|
|
|
return VDAddr;
|
|
|
|
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
auto VarTy = VDAddr.getElementType();
|
2015-02-25 16:32:46 +08:00
|
|
|
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
|
|
|
|
CGM.Int8PtrTy),
|
2014-11-11 12:05:39 +08:00
|
|
|
CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
|
|
|
|
getOrCreateThreadPrivateCache(VD)};
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
return Address(CGF.EmitRuntimeCall(
|
|
|
|
createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
|
|
|
|
VDAddr.getAlignment());
|
2014-11-11 12:05:39 +08:00
|
|
|
}
|
|
|
|
|
2015-02-25 16:32:46 +08:00
|
|
|
void CGOpenMPRuntime::emitThreadPrivateVarInit(
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
|
2014-11-11 12:05:39 +08:00
|
|
|
llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
|
|
|
|
// Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
|
|
|
|
// library.
|
2015-02-25 16:32:46 +08:00
|
|
|
auto OMPLoc = emitUpdateLocation(CGF, Loc);
|
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
|
2014-11-11 12:05:39 +08:00
|
|
|
OMPLoc);
|
|
|
|
// Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
|
|
|
|
// to register constructor/destructor for variable.
|
|
|
|
llvm::Value *Args[] = {OMPLoc,
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
|
|
|
|
CGM.VoidPtrTy),
|
2014-11-11 12:05:39 +08:00
|
|
|
Ctor, CopyCtor, Dtor};
|
2014-12-03 20:11:24 +08:00
|
|
|
CGF.EmitRuntimeCall(
|
2015-02-25 16:32:46 +08:00
|
|
|
createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
|
2014-11-11 12:05:39 +08:00
|
|
|
}
|
|
|
|
|
2015-02-25 16:32:46 +08:00
|
|
|
llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
const VarDecl *VD, Address VDAddr, SourceLocation Loc,
|
2014-11-11 12:05:39 +08:00
|
|
|
bool PerformInit, CodeGenFunction *CGF) {
|
2015-07-14 06:54:53 +08:00
|
|
|
if (CGM.getLangOpts().OpenMPUseTLS &&
|
|
|
|
CGM.getContext().getTargetInfo().isTLSSupported())
|
|
|
|
return nullptr;
|
|
|
|
|
2014-11-11 12:05:39 +08:00
|
|
|
VD = VD->getDefinition(CGM.getContext());
|
|
|
|
if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
|
|
|
|
ThreadPrivateWithDefinition.insert(VD);
|
|
|
|
QualType ASTTy = VD->getType();
|
|
|
|
|
|
|
|
llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
|
|
|
|
auto Init = VD->getAnyInitializer();
|
|
|
|
if (CGM.getLangOpts().CPlusPlus && PerformInit) {
|
|
|
|
// Generate function that re-emits the declaration's initializer into the
|
|
|
|
// threadprivate copy of the variable VD
|
|
|
|
CodeGenFunction CtorCGF(CGM);
|
|
|
|
FunctionArgList Args;
|
|
|
|
ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
|
|
|
|
/*Id=*/nullptr, CGM.getContext().VoidPtrTy);
|
|
|
|
Args.push_back(&Dst);
|
|
|
|
|
2016-03-11 12:30:31 +08:00
|
|
|
auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
|
|
|
|
CGM.getContext().VoidPtrTy, Args);
|
2014-11-11 12:05:39 +08:00
|
|
|
auto FTy = CGM.getTypes().GetFunctionType(FI);
|
|
|
|
auto Fn = CGM.CreateGlobalInitOrDestructFunction(
|
2015-10-31 09:28:07 +08:00
|
|
|
FTy, ".__kmpc_global_ctor_.", FI, Loc);
|
2014-11-11 12:05:39 +08:00
|
|
|
CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
|
|
|
|
Args, SourceLocation());
|
|
|
|
auto ArgVal = CtorCGF.EmitLoadOfScalar(
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
|
2014-11-11 12:05:39 +08:00
|
|
|
CGM.getContext().VoidPtrTy, Dst.getLocation());
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address Arg = Address(ArgVal, VDAddr.getAlignment());
|
|
|
|
Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
|
|
|
|
CtorCGF.ConvertTypeForMem(ASTTy));
|
2014-11-11 12:05:39 +08:00
|
|
|
CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
|
|
|
|
/*IsInitializer=*/true);
|
|
|
|
ArgVal = CtorCGF.EmitLoadOfScalar(
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
|
2014-11-11 12:05:39 +08:00
|
|
|
CGM.getContext().VoidPtrTy, Dst.getLocation());
|
|
|
|
CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
|
|
|
|
CtorCGF.FinishFunction();
|
|
|
|
Ctor = Fn;
|
|
|
|
}
|
|
|
|
if (VD->getType().isDestructedType() != QualType::DK_none) {
|
|
|
|
// Generate function that emits destructor call for the threadprivate copy
|
|
|
|
// of the variable VD
|
|
|
|
CodeGenFunction DtorCGF(CGM);
|
|
|
|
FunctionArgList Args;
|
|
|
|
ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
|
|
|
|
/*Id=*/nullptr, CGM.getContext().VoidPtrTy);
|
|
|
|
Args.push_back(&Dst);
|
|
|
|
|
2016-03-11 12:30:31 +08:00
|
|
|
auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
|
|
|
|
CGM.getContext().VoidTy, Args);
|
2014-11-11 12:05:39 +08:00
|
|
|
auto FTy = CGM.getTypes().GetFunctionType(FI);
|
|
|
|
auto Fn = CGM.CreateGlobalInitOrDestructFunction(
|
2015-10-31 09:28:07 +08:00
|
|
|
FTy, ".__kmpc_global_dtor_.", FI, Loc);
|
2014-11-11 12:05:39 +08:00
|
|
|
DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
|
|
|
|
SourceLocation());
|
|
|
|
auto ArgVal = DtorCGF.EmitLoadOfScalar(
|
|
|
|
DtorCGF.GetAddrOfLocalVar(&Dst),
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
/*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
|
|
|
|
DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
|
2014-11-11 12:05:39 +08:00
|
|
|
DtorCGF.getDestroyer(ASTTy.isDestructedType()),
|
|
|
|
DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
|
|
|
|
DtorCGF.FinishFunction();
|
|
|
|
Dtor = Fn;
|
|
|
|
}
|
|
|
|
// Do not emit init function if it is not required.
|
|
|
|
if (!Ctor && !Dtor)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
|
|
|
|
auto CopyCtorTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
|
|
|
|
/*isVarArg=*/false)->getPointerTo();
|
|
|
|
// Copying constructor for the threadprivate variable.
|
|
|
|
// Must be NULL - reserved by runtime, but currently it requires that this
|
|
|
|
// parameter is always NULL. Otherwise it fires assertion.
|
|
|
|
CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
|
|
|
|
if (Ctor == nullptr) {
|
|
|
|
auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
|
|
|
|
/*isVarArg=*/false)->getPointerTo();
|
|
|
|
Ctor = llvm::Constant::getNullValue(CtorTy);
|
|
|
|
}
|
|
|
|
if (Dtor == nullptr) {
|
|
|
|
auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
|
|
|
|
/*isVarArg=*/false)->getPointerTo();
|
|
|
|
Dtor = llvm::Constant::getNullValue(DtorTy);
|
|
|
|
}
|
|
|
|
if (!CGF) {
|
|
|
|
auto InitFunctionTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
|
|
|
|
auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
|
2015-10-31 09:28:07 +08:00
|
|
|
InitFunctionTy, ".__omp_threadprivate_init_.",
|
|
|
|
CGM.getTypes().arrangeNullaryFunction());
|
2014-11-11 12:05:39 +08:00
|
|
|
CodeGenFunction InitCGF(CGM);
|
|
|
|
FunctionArgList ArgList;
|
|
|
|
InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
|
|
|
|
CGM.getTypes().arrangeNullaryFunction(), ArgList,
|
|
|
|
Loc);
|
2015-02-25 16:32:46 +08:00
|
|
|
emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
|
2014-11-11 12:05:39 +08:00
|
|
|
InitCGF.FinishFunction();
|
|
|
|
return InitFunction;
|
|
|
|
}
|
2015-02-25 16:32:46 +08:00
|
|
|
emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
|
2014-11-11 12:05:39 +08:00
|
|
|
}
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
/// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
|
|
|
|
/// function. Here is the logic:
|
|
|
|
/// if (Cond) {
|
|
|
|
/// ThenGen();
|
|
|
|
/// } else {
|
|
|
|
/// ElseGen();
|
|
|
|
/// }
|
|
|
|
static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
|
|
|
|
const RegionCodeGenTy &ThenGen,
|
|
|
|
const RegionCodeGenTy &ElseGen) {
|
|
|
|
CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
|
|
|
|
|
|
|
|
// If the condition constant folds and can be elided, try to avoid emitting
|
|
|
|
// the condition and the dead arm of the if/else.
|
|
|
|
bool CondConstant;
|
|
|
|
if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
|
2016-03-28 20:58:34 +08:00
|
|
|
CodeGenFunction::RunCleanupsScope Scope(CGF);
|
|
|
|
if (CondConstant) {
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
ThenGen(CGF);
|
2016-03-28 20:58:34 +08:00
|
|
|
} else {
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
ElseGen(CGF);
|
2016-03-28 20:58:34 +08:00
|
|
|
}
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
return;
|
|
|
|
}
|
2014-10-08 22:01:46 +08:00
|
|
|
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
// Otherwise, the condition did not fold, or we couldn't elide it. Just
|
|
|
|
// emit the conditional branch.
|
|
|
|
auto ThenBlock = CGF.createBasicBlock("omp_if.then");
|
|
|
|
auto ElseBlock = CGF.createBasicBlock("omp_if.else");
|
|
|
|
auto ContBlock = CGF.createBasicBlock("omp_if.end");
|
|
|
|
CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
|
[OPENMP] Codegen for 'if' clause in 'parallel' directive.
Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive.
If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
global_thread_id.addr = alloca i32
store i32 global_thread_id, global_thread_id.addr
zero.addr = alloca i32
store i32 0, zero.addr
kmpc_serialized_parallel(loc, global_thread_id);
microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/);
kmpc_end_serialized_parallel(loc, global_thread_id);
Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call.
Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D4716
llvm-svn: 219597
2014-10-13 14:02:40 +08:00
|
|
|
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
// Emit the 'then' code.
|
|
|
|
CGF.EmitBlock(ThenBlock);
|
2016-03-28 20:58:34 +08:00
|
|
|
{
|
|
|
|
CodeGenFunction::RunCleanupsScope ThenScope(CGF);
|
|
|
|
ThenGen(CGF);
|
|
|
|
}
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
CGF.EmitBranch(ContBlock);
|
|
|
|
// Emit the 'else' code if present.
|
2016-03-28 20:58:34 +08:00
|
|
|
{
|
|
|
|
// There is no need to emit line number for unconditional branch.
|
|
|
|
auto NL = ApplyDebugLocation::CreateEmpty(CGF);
|
|
|
|
CGF.EmitBlock(ElseBlock);
|
|
|
|
}
|
|
|
|
{
|
|
|
|
CodeGenFunction::RunCleanupsScope ThenScope(CGF);
|
|
|
|
ElseGen(CGF);
|
|
|
|
}
|
|
|
|
{
|
|
|
|
// There is no need to emit line number for unconditional branch.
|
|
|
|
auto NL = ApplyDebugLocation::CreateEmpty(CGF);
|
|
|
|
CGF.EmitBranch(ContBlock);
|
|
|
|
}
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
// Emit the continuation block for code after the if.
|
|
|
|
CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
|
|
|
|
}
|
[OPENMP] Codegen for 'if' clause in 'parallel' directive.
Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive.
If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
global_thread_id.addr = alloca i32
store i32 global_thread_id, global_thread_id.addr
zero.addr = alloca i32
store i32 0, zero.addr
kmpc_serialized_parallel(loc, global_thread_id);
microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/);
kmpc_end_serialized_parallel(loc, global_thread_id);
Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call.
Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D4716
llvm-svn: 219597
2014-10-13 14:02:40 +08:00
|
|
|
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
|
|
|
|
llvm::Value *OutlinedFn,
|
2015-09-10 16:12:02 +08:00
|
|
|
ArrayRef<llvm::Value *> CapturedVars,
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
const Expr *IfCond) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
auto *RTLoc = emitUpdateLocation(CGF, Loc);
|
2016-03-28 20:58:34 +08:00
|
|
|
auto &&ThenGen = [this, OutlinedFn, CapturedVars,
|
|
|
|
RTLoc](CodeGenFunction &CGF) {
|
2015-09-10 16:12:02 +08:00
|
|
|
// Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
|
|
|
|
llvm::Value *Args[] = {
|
|
|
|
RTLoc,
|
|
|
|
CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
|
2016-03-28 20:58:34 +08:00
|
|
|
CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
|
2015-09-10 16:12:02 +08:00
|
|
|
llvm::SmallVector<llvm::Value *, 16> RealArgs;
|
|
|
|
RealArgs.append(std::begin(Args), std::end(Args));
|
|
|
|
RealArgs.append(CapturedVars.begin(), CapturedVars.end());
|
|
|
|
|
2016-03-28 20:58:34 +08:00
|
|
|
auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
|
2015-09-10 16:12:02 +08:00
|
|
|
CGF.EmitRuntimeCall(RTLFn, RealArgs);
|
|
|
|
};
|
2016-03-28 20:58:34 +08:00
|
|
|
auto &&ElseGen = [this, OutlinedFn, CapturedVars, RTLoc,
|
|
|
|
Loc](CodeGenFunction &CGF) {
|
|
|
|
auto ThreadID = getThreadID(CGF, Loc);
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
// Build calls:
|
|
|
|
// __kmpc_serialized_parallel(&Loc, GTid);
|
|
|
|
llvm::Value *Args[] = {RTLoc, ThreadID};
|
2016-03-28 20:58:34 +08:00
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel),
|
|
|
|
Args);
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
|
|
|
|
// OutlinedFn(>id, &zero, CapturedStruct);
|
2016-03-28 20:58:34 +08:00
|
|
|
auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address ZeroAddr =
|
2016-03-28 20:58:34 +08:00
|
|
|
CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
|
|
|
|
/*Name*/ ".zero.addr");
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
|
2015-09-10 16:12:02 +08:00
|
|
|
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
|
|
|
|
OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
|
|
|
|
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
|
|
|
|
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
|
|
|
|
|
|
|
|
// __kmpc_end_serialized_parallel(&Loc, GTid);
|
2016-03-28 20:58:34 +08:00
|
|
|
llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID};
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
CGF.EmitRuntimeCall(
|
2016-03-28 20:58:34 +08:00
|
|
|
createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs);
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
};
|
2016-03-28 20:58:34 +08:00
|
|
|
if (IfCond) {
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
|
2016-03-28 20:58:34 +08:00
|
|
|
} else {
|
|
|
|
CodeGenFunction::RunCleanupsScope Scope(CGF);
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
ThenGen(CGF);
|
2016-03-28 20:58:34 +08:00
|
|
|
}
|
[OPENMP] Codegen for 'if' clause in 'parallel' directive.
Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive.
If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
global_thread_id.addr = alloca i32
store i32 global_thread_id, global_thread_id.addr
zero.addr = alloca i32
store i32 0, zero.addr
kmpc_serialized_parallel(loc, global_thread_id);
microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/);
kmpc_end_serialized_parallel(loc, global_thread_id);
Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call.
Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D4716
llvm-svn: 219597
2014-10-13 14:02:40 +08:00
|
|
|
}
|
|
|
|
|
2014-10-27 16:08:18 +08:00
|
|
|
// If we're inside an (outlined) parallel region, use the region info's
|
[OPENMP] Codegen for 'if' clause in 'parallel' directive.
Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive.
If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
global_thread_id.addr = alloca i32
store i32 global_thread_id, global_thread_id.addr
zero.addr = alloca i32
store i32 0, zero.addr
kmpc_serialized_parallel(loc, global_thread_id);
microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/);
kmpc_end_serialized_parallel(loc, global_thread_id);
Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call.
Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D4716
llvm-svn: 219597
2014-10-13 14:02:40 +08:00
|
|
|
// thread-ID variable (it is passed in a first argument of the outlined function
|
|
|
|
// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
|
|
|
|
// regular serial code region, get thread ID by calling kmp_int32
|
|
|
|
// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
|
|
|
|
// return the address of that temp.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
|
|
|
|
SourceLocation Loc) {
|
2016-01-22 16:56:50 +08:00
|
|
|
if (auto *OMPRegionInfo =
|
[OPENMP] Codegen for 'if' clause in 'parallel' directive.
Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive.
If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
global_thread_id.addr = alloca i32
store i32 global_thread_id, global_thread_id.addr
zero.addr = alloca i32
store i32 0, zero.addr
kmpc_serialized_parallel(loc, global_thread_id);
microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/);
kmpc_end_serialized_parallel(loc, global_thread_id);
Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call.
Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D4716
llvm-svn: 219597
2014-10-13 14:02:40 +08:00
|
|
|
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
|
2015-02-26 18:27:34 +08:00
|
|
|
if (OMPRegionInfo->getThreadIDVariable())
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
|
2015-02-26 18:27:34 +08:00
|
|
|
|
2015-02-25 16:32:46 +08:00
|
|
|
auto ThreadID = getThreadID(CGF, Loc);
|
[OPENMP] Codegen for 'if' clause in 'parallel' directive.
Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive.
If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
global_thread_id.addr = alloca i32
store i32 global_thread_id, global_thread_id.addr
zero.addr = alloca i32
store i32 0, zero.addr
kmpc_serialized_parallel(loc, global_thread_id);
microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/);
kmpc_end_serialized_parallel(loc, global_thread_id);
Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call.
Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D4716
llvm-svn: 219597
2014-10-13 14:02:40 +08:00
|
|
|
auto Int32Ty =
|
|
|
|
CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
|
|
|
|
auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
|
|
|
|
CGF.EmitStoreOfScalar(ThreadID,
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
|
[OPENMP] Codegen for 'if' clause in 'parallel' directive.
Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive.
If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
global_thread_id.addr = alloca i32
store i32 global_thread_id, global_thread_id.addr
zero.addr = alloca i32
store i32 0, zero.addr
kmpc_serialized_parallel(loc, global_thread_id);
microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/);
kmpc_end_serialized_parallel(loc, global_thread_id);
Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call.
Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D4716
llvm-svn: 219597
2014-10-13 14:02:40 +08:00
|
|
|
|
|
|
|
return ThreadIDTemp;
|
|
|
|
}
|
|
|
|
|
2014-11-11 12:05:39 +08:00
|
|
|
llvm::Constant *
|
2015-02-25 16:32:46 +08:00
|
|
|
CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
|
2014-11-11 12:05:39 +08:00
|
|
|
const llvm::Twine &Name) {
|
2014-09-22 18:01:53 +08:00
|
|
|
SmallString<256> Buffer;
|
|
|
|
llvm::raw_svector_ostream Out(Buffer);
|
2014-11-11 12:05:39 +08:00
|
|
|
Out << Name;
|
|
|
|
auto RuntimeName = Out.str();
|
2014-11-19 11:06:06 +08:00
|
|
|
auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
|
|
|
|
if (Elem.second) {
|
|
|
|
assert(Elem.second->getType()->getPointerElementType() == Ty &&
|
2014-11-11 12:05:39 +08:00
|
|
|
"OMP internal variable has different type than requested");
|
2014-11-19 11:06:06 +08:00
|
|
|
return &*Elem.second;
|
2014-11-11 12:05:39 +08:00
|
|
|
}
|
|
|
|
|
2014-11-19 11:06:06 +08:00
|
|
|
return Elem.second = new llvm::GlobalVariable(
|
|
|
|
CGM.getModule(), Ty, /*IsConstant*/ false,
|
|
|
|
llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
|
|
|
|
Elem.first());
|
2014-11-11 12:05:39 +08:00
|
|
|
}
|
|
|
|
|
2015-02-25 16:32:46 +08:00
|
|
|
llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
|
2014-11-11 12:05:39 +08:00
|
|
|
llvm::Twine Name(".gomp_critical_user_", CriticalName);
|
2015-02-25 16:32:46 +08:00
|
|
|
return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
|
2014-09-22 18:01:53 +08:00
|
|
|
}
|
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
namespace {
|
2016-03-28 20:58:34 +08:00
|
|
|
template <size_t N> class CallEndCleanup final : public EHScopeStack::Cleanup {
|
|
|
|
llvm::Value *Callee;
|
|
|
|
llvm::Value *Args[N];
|
2015-04-10 12:50:10 +08:00
|
|
|
|
|
|
|
public:
|
2016-03-28 20:58:34 +08:00
|
|
|
CallEndCleanup(llvm::Value *Callee, ArrayRef<llvm::Value *> CleanupArgs)
|
|
|
|
: Callee(Callee) {
|
|
|
|
assert(CleanupArgs.size() == N);
|
|
|
|
std::copy(CleanupArgs.begin(), CleanupArgs.end(), std::begin(Args));
|
2016-03-28 20:52:58 +08:00
|
|
|
}
|
2016-03-28 20:58:34 +08:00
|
|
|
|
|
|
|
void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
|
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
|
|
|
CGF.EmitRuntimeCall(Callee, Args);
|
2015-04-10 15:48:12 +08:00
|
|
|
}
|
2015-04-10 12:50:10 +08:00
|
|
|
};
|
2015-09-11 01:07:54 +08:00
|
|
|
} // anonymous namespace
|
2015-04-10 12:50:10 +08:00
|
|
|
|
|
|
|
void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
|
|
|
|
StringRef CriticalName,
|
|
|
|
const RegionCodeGenTy &CriticalOpGen,
|
2015-12-15 18:55:09 +08:00
|
|
|
SourceLocation Loc, const Expr *Hint) {
|
|
|
|
// __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
|
2014-12-01 19:32:38 +08:00
|
|
|
// CriticalOpGen();
|
|
|
|
// __kmpc_end_critical(ident_t *, gtid, Lock);
|
|
|
|
// Prepare arguments and build a call to __kmpc_critical
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2016-03-28 20:58:34 +08:00
|
|
|
CodeGenFunction::RunCleanupsScope Scope(CGF);
|
2015-12-15 18:55:09 +08:00
|
|
|
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
|
|
|
|
getCriticalRegionLock(CriticalName)};
|
|
|
|
if (Hint) {
|
2016-03-28 20:58:34 +08:00
|
|
|
llvm::SmallVector<llvm::Value *, 8> ArgsWithHint(std::begin(Args),
|
|
|
|
std::end(Args));
|
|
|
|
auto *HintVal = CGF.EmitScalarExpr(Hint);
|
|
|
|
ArgsWithHint.push_back(
|
|
|
|
CGF.Builder.CreateIntCast(HintVal, CGM.IntPtrTy, /*isSigned=*/false));
|
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical_with_hint),
|
|
|
|
ArgsWithHint);
|
|
|
|
} else
|
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
|
|
|
|
// Build a call to __kmpc_end_critical
|
|
|
|
CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
|
|
|
|
NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical),
|
|
|
|
llvm::makeArrayRef(Args));
|
2015-12-15 18:55:09 +08:00
|
|
|
emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
|
2014-09-22 18:01:53 +08:00
|
|
|
}
|
2014-10-08 22:01:46 +08:00
|
|
|
|
2016-03-28 20:58:34 +08:00
|
|
|
static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
|
|
|
|
OpenMPDirectiveKind Kind, SourceLocation Loc,
|
|
|
|
const RegionCodeGenTy &BodyOpGen) {
|
|
|
|
llvm::Value *CallBool = CGF.EmitScalarConversion(
|
|
|
|
IfCond,
|
|
|
|
CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true),
|
|
|
|
CGF.getContext().BoolTy, Loc);
|
|
|
|
|
|
|
|
auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
|
|
|
|
auto *ContBlock = CGF.createBasicBlock("omp_if.end");
|
|
|
|
// Generate the branch (If-stmt)
|
|
|
|
CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
|
|
|
|
CGF.EmitBlock(ThenBlock);
|
|
|
|
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, Kind, BodyOpGen);
|
|
|
|
// Emit the rest of bblocks/branches
|
|
|
|
CGF.EmitBranch(ContBlock);
|
|
|
|
CGF.EmitBlock(ContBlock, true);
|
|
|
|
}
|
|
|
|
|
2015-02-25 16:32:46 +08:00
|
|
|
void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
|
2015-04-10 12:50:10 +08:00
|
|
|
const RegionCodeGenTy &MasterOpGen,
|
2015-02-25 16:32:46 +08:00
|
|
|
SourceLocation Loc) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2014-12-04 15:23:53 +08:00
|
|
|
// if(__kmpc_master(ident_t *, gtid)) {
|
|
|
|
// MasterOpGen();
|
|
|
|
// __kmpc_end_master(ident_t *, gtid);
|
|
|
|
// }
|
|
|
|
// Prepare arguments and build a call to __kmpc_master
|
2015-04-10 14:33:45 +08:00
|
|
|
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
|
2016-03-28 20:58:34 +08:00
|
|
|
auto *IsMaster =
|
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args);
|
|
|
|
typedef CallEndCleanup<std::extent<decltype(Args)>::value>
|
|
|
|
MasterCallEndCleanup;
|
|
|
|
emitIfStmt(
|
|
|
|
CGF, IsMaster, OMPD_master, Loc, [&](CodeGenFunction &CGF) -> void {
|
|
|
|
CodeGenFunction::RunCleanupsScope Scope(CGF);
|
|
|
|
CGF.EHStack.pushCleanup<MasterCallEndCleanup>(
|
|
|
|
NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master),
|
|
|
|
llvm::makeArrayRef(Args));
|
|
|
|
MasterOpGen(CGF);
|
|
|
|
});
|
2014-12-04 15:23:53 +08:00
|
|
|
}
|
|
|
|
|
2015-02-25 16:32:46 +08:00
|
|
|
void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
|
|
|
|
SourceLocation Loc) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-02-05 13:57:51 +08:00
|
|
|
// Build call __kmpc_omp_taskyield(loc, thread_id, 0);
|
|
|
|
llvm::Value *Args[] = {
|
2015-02-25 16:32:46 +08:00
|
|
|
emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
|
2015-02-05 13:57:51 +08:00
|
|
|
llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
|
2015-02-25 16:32:46 +08:00
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
|
2015-02-05 13:57:51 +08:00
|
|
|
}
|
|
|
|
|
2015-06-18 20:14:09 +08:00
|
|
|
void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
|
|
|
|
const RegionCodeGenTy &TaskgroupOpGen,
|
|
|
|
SourceLocation Loc) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-06-18 20:14:09 +08:00
|
|
|
// __kmpc_taskgroup(ident_t *, gtid);
|
|
|
|
// TaskgroupOpGen();
|
|
|
|
// __kmpc_end_taskgroup(ident_t *, gtid);
|
|
|
|
// Prepare arguments and build a call to __kmpc_taskgroup
|
2016-03-28 20:58:34 +08:00
|
|
|
{
|
|
|
|
CodeGenFunction::RunCleanupsScope Scope(CGF);
|
|
|
|
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
|
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args);
|
|
|
|
// Build a call to __kmpc_end_taskgroup
|
|
|
|
CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
|
|
|
|
NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
|
|
|
|
llvm::makeArrayRef(Args));
|
|
|
|
emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
|
|
|
|
}
|
2015-06-18 20:14:09 +08:00
|
|
|
}
|
|
|
|
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
/// Given an array of pointers to variables, project the address of a
|
|
|
|
/// given variable.
|
2015-10-08 17:10:53 +08:00
|
|
|
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
|
|
|
|
unsigned Index, const VarDecl *Var) {
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
// Pull out the pointer to the variable.
|
|
|
|
Address PtrAddr =
|
2015-10-08 17:10:53 +08:00
|
|
|
CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
|
|
|
|
|
|
|
|
Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
|
2015-10-08 17:10:53 +08:00
|
|
|
Addr = CGF.Builder.CreateElementBitCast(
|
|
|
|
Addr, CGF.ConvertTypeForMem(Var->getType()));
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
return Addr;
|
|
|
|
}
|
|
|
|
|
2015-03-23 14:18:07 +08:00
|
|
|
static llvm::Value *emitCopyprivateCopyFunction(
|
2015-04-14 13:11:24 +08:00
|
|
|
CodeGenModule &CGM, llvm::Type *ArgsType,
|
|
|
|
ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
|
|
|
|
ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
|
2015-03-23 14:18:07 +08:00
|
|
|
auto &C = CGM.getContext();
|
|
|
|
// void copy_func(void *LHSArg, void *RHSArg);
|
|
|
|
FunctionArgList Args;
|
|
|
|
ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
|
|
|
|
C.VoidPtrTy);
|
|
|
|
ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
|
|
|
|
C.VoidPtrTy);
|
|
|
|
Args.push_back(&LHSArg);
|
|
|
|
Args.push_back(&RHSArg);
|
2016-03-11 12:30:31 +08:00
|
|
|
auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
|
2015-03-23 14:18:07 +08:00
|
|
|
auto *Fn = llvm::Function::Create(
|
|
|
|
CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
|
|
|
|
".omp.copyprivate.copy_func", &CGM.getModule());
|
2015-10-28 10:30:47 +08:00
|
|
|
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
|
2015-03-23 14:18:07 +08:00
|
|
|
CodeGenFunction CGF(CGM);
|
|
|
|
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
|
2015-04-14 13:11:24 +08:00
|
|
|
// Dest = (void*[n])(LHSArg);
|
2015-03-23 14:18:07 +08:00
|
|
|
// Src = (void*[n])(RHSArg);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
|
CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
|
|
|
|
ArgsType), CGF.getPointerAlign());
|
|
|
|
Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
|
CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
|
|
|
|
ArgsType), CGF.getPointerAlign());
|
2015-03-23 14:18:07 +08:00
|
|
|
// *(Type0*)Dst[0] = *(Type0*)Src[0];
|
|
|
|
// *(Type1*)Dst[1] = *(Type1*)Src[1];
|
|
|
|
// ...
|
|
|
|
// *(Typen*)Dst[n] = *(Typen*)Src[n];
|
|
|
|
for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
|
|
|
|
Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
|
|
|
|
|
|
|
|
auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
|
|
|
|
Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
|
|
|
|
|
2015-05-19 20:31:28 +08:00
|
|
|
auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
|
|
|
|
QualType Type = VD->getType();
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
|
2015-03-23 14:18:07 +08:00
|
|
|
}
|
|
|
|
CGF.FinishFunction();
|
|
|
|
return Fn;
|
|
|
|
}
|
|
|
|
|
2015-02-25 16:32:46 +08:00
|
|
|
void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
|
2015-04-10 12:50:10 +08:00
|
|
|
const RegionCodeGenTy &SingleOpGen,
|
2015-03-23 14:18:07 +08:00
|
|
|
SourceLocation Loc,
|
|
|
|
ArrayRef<const Expr *> CopyprivateVars,
|
|
|
|
ArrayRef<const Expr *> SrcExprs,
|
|
|
|
ArrayRef<const Expr *> DstExprs,
|
|
|
|
ArrayRef<const Expr *> AssignmentOps) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-03-23 14:18:07 +08:00
|
|
|
assert(CopyprivateVars.size() == SrcExprs.size() &&
|
|
|
|
CopyprivateVars.size() == DstExprs.size() &&
|
|
|
|
CopyprivateVars.size() == AssignmentOps.size());
|
|
|
|
auto &C = CGM.getContext();
|
|
|
|
// int32 did_it = 0;
|
2015-02-05 14:35:41 +08:00
|
|
|
// if(__kmpc_single(ident_t *, gtid)) {
|
|
|
|
// SingleOpGen();
|
|
|
|
// __kmpc_end_single(ident_t *, gtid);
|
2015-03-23 14:18:07 +08:00
|
|
|
// did_it = 1;
|
2015-02-05 14:35:41 +08:00
|
|
|
// }
|
2015-03-23 14:18:07 +08:00
|
|
|
// call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
|
|
|
|
// <copy_func>, did_it);
|
|
|
|
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address DidIt = Address::invalid();
|
2015-03-23 14:18:07 +08:00
|
|
|
if (!CopyprivateVars.empty()) {
|
|
|
|
// int32 did_it = 0;
|
|
|
|
auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
|
|
|
|
DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
|
2015-03-23 14:18:07 +08:00
|
|
|
}
|
2015-02-05 14:35:41 +08:00
|
|
|
// Prepare arguments and build a call to __kmpc_single
|
2015-04-10 14:33:45 +08:00
|
|
|
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
|
2016-03-28 20:58:34 +08:00
|
|
|
auto *IsSingle =
|
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
|
|
|
|
typedef CallEndCleanup<std::extent<decltype(Args)>::value>
|
|
|
|
SingleCallEndCleanup;
|
|
|
|
emitIfStmt(
|
|
|
|
CGF, IsSingle, OMPD_single, Loc, [&](CodeGenFunction &CGF) -> void {
|
|
|
|
CodeGenFunction::RunCleanupsScope Scope(CGF);
|
|
|
|
CGF.EHStack.pushCleanup<SingleCallEndCleanup>(
|
|
|
|
NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single),
|
|
|
|
llvm::makeArrayRef(Args));
|
|
|
|
SingleOpGen(CGF);
|
|
|
|
if (DidIt.isValid()) {
|
|
|
|
// did_it = 1;
|
|
|
|
CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
|
|
|
|
}
|
|
|
|
});
|
2015-03-23 14:18:07 +08:00
|
|
|
// call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
|
|
|
|
// <copy_func>, did_it);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
if (DidIt.isValid()) {
|
2015-03-23 14:18:07 +08:00
|
|
|
llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
|
|
|
|
auto CopyprivateArrayTy =
|
|
|
|
C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
|
|
|
|
/*IndexTypeQuals=*/0);
|
|
|
|
// Create a list of all private variables for copyprivate.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address CopyprivateList =
|
2015-03-23 14:18:07 +08:00
|
|
|
CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
|
|
|
|
for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address Elem = CGF.Builder.CreateConstArrayGEP(
|
|
|
|
CopyprivateList, I, CGF.getPointerSize());
|
|
|
|
CGF.Builder.CreateStore(
|
2015-03-23 14:18:07 +08:00
|
|
|
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
|
|
|
|
Elem);
|
2015-03-23 14:18:07 +08:00
|
|
|
}
|
|
|
|
// Build function that copies private values from single region to all other
|
|
|
|
// threads in the corresponding parallel region.
|
|
|
|
auto *CpyFn = emitCopyprivateCopyFunction(
|
|
|
|
CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
|
2015-04-14 13:11:24 +08:00
|
|
|
CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
|
2016-01-26 20:20:39 +08:00
|
|
|
auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address CL =
|
|
|
|
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
|
|
|
|
CGF.VoidPtrTy);
|
|
|
|
auto *DidItVal = CGF.Builder.CreateLoad(DidIt);
|
2015-03-23 14:18:07 +08:00
|
|
|
llvm::Value *Args[] = {
|
|
|
|
emitUpdateLocation(CGF, Loc), // ident_t *<loc>
|
|
|
|
getThreadID(CGF, Loc), // i32 <gtid>
|
2015-04-30 11:47:32 +08:00
|
|
|
BufSize, // size_t <buf_size>
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CL.getPointer(), // void *<copyprivate list>
|
2015-03-23 14:18:07 +08:00
|
|
|
CpyFn, // void (*) (void *, void *) <copy_func>
|
|
|
|
DidItVal // i32 did_it
|
|
|
|
};
|
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
|
|
|
|
}
|
2015-02-05 14:35:41 +08:00
|
|
|
}
|
|
|
|
|
2015-04-22 19:15:40 +08:00
|
|
|
void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
|
|
|
|
const RegionCodeGenTy &OrderedOpGen,
|
2015-09-29 11:48:57 +08:00
|
|
|
SourceLocation Loc, bool IsThreads) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-04-22 19:15:40 +08:00
|
|
|
// __kmpc_ordered(ident_t *, gtid);
|
|
|
|
// OrderedOpGen();
|
|
|
|
// __kmpc_end_ordered(ident_t *, gtid);
|
|
|
|
// Prepare arguments and build a call to __kmpc_ordered
|
2016-03-28 20:58:34 +08:00
|
|
|
CodeGenFunction::RunCleanupsScope Scope(CGF);
|
2015-09-29 11:48:57 +08:00
|
|
|
if (IsThreads) {
|
2015-04-22 19:15:40 +08:00
|
|
|
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
|
2016-03-28 20:58:34 +08:00
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args);
|
|
|
|
// Build a call to __kmpc_end_ordered
|
|
|
|
CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
|
|
|
|
NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered),
|
|
|
|
llvm::makeArrayRef(Args));
|
2015-04-22 19:15:40 +08:00
|
|
|
}
|
2015-09-29 11:48:57 +08:00
|
|
|
emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
|
2015-04-22 19:15:40 +08:00
|
|
|
}
|
|
|
|
|
2015-02-25 16:32:46 +08:00
|
|
|
void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
|
2015-09-15 20:52:43 +08:00
|
|
|
OpenMPDirectiveKind Kind, bool EmitChecks,
|
|
|
|
bool ForceSimpleCall) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2014-12-05 12:09:23 +08:00
|
|
|
// Build call __kmpc_cancel_barrier(loc, thread_id);
|
2015-07-03 17:56:58 +08:00
|
|
|
// Build call __kmpc_barrier(loc, thread_id);
|
2016-02-19 18:38:26 +08:00
|
|
|
unsigned Flags;
|
|
|
|
if (Kind == OMPD_for)
|
|
|
|
Flags = OMP_IDENT_BARRIER_IMPL_FOR;
|
|
|
|
else if (Kind == OMPD_sections)
|
|
|
|
Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
|
|
|
|
else if (Kind == OMPD_single)
|
|
|
|
Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
|
|
|
|
else if (Kind == OMPD_barrier)
|
|
|
|
Flags = OMP_IDENT_BARRIER_EXPL;
|
|
|
|
else
|
|
|
|
Flags = OMP_IDENT_BARRIER_IMPL;
|
2015-07-03 17:56:58 +08:00
|
|
|
// Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
|
|
|
|
// thread_id);
|
2015-02-25 16:32:46 +08:00
|
|
|
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
|
|
|
|
getThreadID(CGF, Loc)};
|
2016-01-22 16:56:50 +08:00
|
|
|
if (auto *OMPRegionInfo =
|
|
|
|
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
|
2015-09-15 20:52:43 +08:00
|
|
|
if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
|
2015-07-03 17:56:58 +08:00
|
|
|
auto *Result = CGF.EmitRuntimeCall(
|
|
|
|
createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
|
2015-09-15 20:52:43 +08:00
|
|
|
if (EmitChecks) {
|
2015-07-03 17:56:58 +08:00
|
|
|
// if (__kmpc_cancel_barrier()) {
|
|
|
|
// exit from construct;
|
|
|
|
// }
|
|
|
|
auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
|
|
|
|
auto *ContBB = CGF.createBasicBlock(".cancel.continue");
|
|
|
|
auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
|
|
|
|
CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
|
|
|
|
CGF.EmitBlock(ExitBB);
|
|
|
|
// exit from construct;
|
2015-09-15 20:52:43 +08:00
|
|
|
auto CancelDestination =
|
|
|
|
CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
|
2015-07-03 17:56:58 +08:00
|
|
|
CGF.EmitBranchThroughCleanup(CancelDestination);
|
|
|
|
CGF.EmitBlock(ContBB, /*IsFinished=*/true);
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
|
2014-10-08 22:01:46 +08:00
|
|
|
}
|
|
|
|
|
2014-12-15 15:07:06 +08:00
|
|
|
/// \brief Map the OpenMP loop schedule to the runtime enumeration.
|
|
|
|
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
|
2015-05-20 21:12:48 +08:00
|
|
|
bool Chunked, bool Ordered) {
|
2014-12-15 15:07:06 +08:00
|
|
|
switch (ScheduleKind) {
|
|
|
|
case OMPC_SCHEDULE_static:
|
2015-05-20 21:12:48 +08:00
|
|
|
return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
|
|
|
|
: (Ordered ? OMP_ord_static : OMP_sch_static);
|
2014-12-15 15:07:06 +08:00
|
|
|
case OMPC_SCHEDULE_dynamic:
|
2015-05-20 21:12:48 +08:00
|
|
|
return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
|
2014-12-15 15:07:06 +08:00
|
|
|
case OMPC_SCHEDULE_guided:
|
2015-05-20 21:12:48 +08:00
|
|
|
return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
|
2014-12-15 15:07:06 +08:00
|
|
|
case OMPC_SCHEDULE_runtime:
|
2015-05-20 21:12:48 +08:00
|
|
|
return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
|
|
|
|
case OMPC_SCHEDULE_auto:
|
|
|
|
return Ordered ? OMP_ord_auto : OMP_sch_auto;
|
2014-12-15 15:07:06 +08:00
|
|
|
case OMPC_SCHEDULE_unknown:
|
|
|
|
assert(!Chunked && "chunk was specified but schedule kind not known");
|
2015-05-20 21:12:48 +08:00
|
|
|
return Ordered ? OMP_ord_static : OMP_sch_static;
|
2014-12-15 15:07:06 +08:00
|
|
|
}
|
|
|
|
llvm_unreachable("Unexpected runtime schedule");
|
|
|
|
}
|
|
|
|
|
2016-03-08 00:04:49 +08:00
|
|
|
/// \brief Map the OpenMP distribute schedule to the runtime enumeration.
|
|
|
|
static OpenMPSchedType
|
|
|
|
getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
|
|
|
|
// only static is allowed for dist_schedule
|
|
|
|
return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
|
|
|
|
}
|
|
|
|
|
2014-12-15 15:07:06 +08:00
|
|
|
bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
|
|
|
|
bool Chunked) const {
|
2015-05-20 21:12:48 +08:00
|
|
|
auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
|
2014-12-15 15:07:06 +08:00
|
|
|
return Schedule == OMP_sch_static;
|
|
|
|
}
|
|
|
|
|
2016-03-08 00:04:49 +08:00
|
|
|
bool CGOpenMPRuntime::isStaticNonchunked(
|
|
|
|
OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
|
|
|
|
auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
|
|
|
|
return Schedule == OMP_dist_sch_static;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2015-01-22 16:49:35 +08:00
|
|
|
bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
|
2015-05-20 21:12:48 +08:00
|
|
|
auto Schedule =
|
|
|
|
getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
|
2015-01-22 16:49:35 +08:00
|
|
|
assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
|
|
|
|
return Schedule != OMP_sch_static;
|
|
|
|
}
|
|
|
|
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
|
|
|
|
SourceLocation Loc,
|
|
|
|
OpenMPScheduleClauseKind ScheduleKind,
|
|
|
|
unsigned IVSize, bool IVSigned,
|
|
|
|
bool Ordered, llvm::Value *UB,
|
|
|
|
llvm::Value *Chunk) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-05-20 21:12:48 +08:00
|
|
|
OpenMPSchedType Schedule =
|
|
|
|
getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
assert(Ordered ||
|
|
|
|
(Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
|
|
|
|
Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked));
|
|
|
|
// Call __kmpc_dispatch_init(
|
|
|
|
// ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
|
|
|
|
// kmp_int[32|64] lower, kmp_int[32|64] upper,
|
|
|
|
// kmp_int[32|64] stride, kmp_int[32|64] chunk);
|
|
|
|
|
|
|
|
// If the Chunk was not specified in the clause - use default value 1.
|
|
|
|
if (Chunk == nullptr)
|
|
|
|
Chunk = CGF.Builder.getIntN(IVSize, 1);
|
|
|
|
llvm::Value *Args[] = {
|
2016-02-19 18:38:26 +08:00
|
|
|
emitUpdateLocation(CGF, Loc),
|
|
|
|
getThreadID(CGF, Loc),
|
|
|
|
CGF.Builder.getInt32(Schedule), // Schedule type
|
|
|
|
CGF.Builder.getIntN(IVSize, 0), // Lower
|
|
|
|
UB, // Upper
|
|
|
|
CGF.Builder.getIntN(IVSize, 1), // Stride
|
|
|
|
Chunk // Chunk
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
};
|
|
|
|
CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
|
|
|
|
}
|
2015-03-12 21:37:50 +08:00
|
|
|
|
2016-03-08 00:04:49 +08:00
|
|
|
static void emitForStaticInitCall(CodeGenFunction &CGF,
|
|
|
|
SourceLocation Loc,
|
|
|
|
llvm::Value * UpdateLocation,
|
|
|
|
llvm::Value * ThreadId,
|
|
|
|
llvm::Constant * ForStaticInitFunction,
|
|
|
|
OpenMPSchedType Schedule,
|
|
|
|
unsigned IVSize, bool IVSigned, bool Ordered,
|
|
|
|
Address IL, Address LB, Address UB,
|
|
|
|
Address ST, llvm::Value *Chunk) {
|
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
|
|
|
|
|
|
|
assert(!Ordered);
|
|
|
|
assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
|
|
|
|
Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
|
|
|
|
Schedule == OMP_dist_sch_static ||
|
|
|
|
Schedule == OMP_dist_sch_static_chunked);
|
|
|
|
|
|
|
|
// Call __kmpc_for_static_init(
|
|
|
|
// ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
|
|
|
|
// kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
|
|
|
|
// kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
|
|
|
|
// kmp_int[32|64] incr, kmp_int[32|64] chunk);
|
|
|
|
if (Chunk == nullptr) {
|
|
|
|
assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
|
|
|
|
Schedule == OMP_dist_sch_static) &&
|
|
|
|
"expected static non-chunked schedule");
|
|
|
|
// If the Chunk was not specified in the clause - use default value 1.
|
|
|
|
Chunk = CGF.Builder.getIntN(IVSize, 1);
|
|
|
|
} else {
|
|
|
|
assert((Schedule == OMP_sch_static_chunked ||
|
|
|
|
Schedule == OMP_ord_static_chunked ||
|
|
|
|
Schedule == OMP_dist_sch_static_chunked) &&
|
|
|
|
"expected static chunked schedule");
|
|
|
|
}
|
|
|
|
llvm::Value *Args[] = {
|
|
|
|
UpdateLocation,
|
|
|
|
ThreadId,
|
|
|
|
CGF.Builder.getInt32(Schedule), // Schedule type
|
|
|
|
IL.getPointer(), // &isLastIter
|
|
|
|
LB.getPointer(), // &LB
|
|
|
|
UB.getPointer(), // &UB
|
|
|
|
ST.getPointer(), // &Stride
|
|
|
|
CGF.Builder.getIntN(IVSize, 1), // Incr
|
|
|
|
Chunk // Chunk
|
|
|
|
};
|
|
|
|
CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
|
|
|
|
}
|
|
|
|
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
|
|
|
|
SourceLocation Loc,
|
|
|
|
OpenMPScheduleClauseKind ScheduleKind,
|
|
|
|
unsigned IVSize, bool IVSigned,
|
|
|
|
bool Ordered, Address IL, Address LB,
|
|
|
|
Address UB, Address ST,
|
|
|
|
llvm::Value *Chunk) {
|
2016-03-08 00:04:49 +08:00
|
|
|
OpenMPSchedType ScheduleNum = getRuntimeSchedule(ScheduleKind, Chunk != nullptr,
|
|
|
|
Ordered);
|
|
|
|
auto *UpdatedLocation = emitUpdateLocation(CGF, Loc);
|
|
|
|
auto *ThreadId = getThreadID(CGF, Loc);
|
|
|
|
auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned);
|
|
|
|
emitForStaticInitCall(CGF, Loc, UpdatedLocation, ThreadId, StaticInitFunction,
|
|
|
|
ScheduleNum, IVSize, IVSigned, Ordered, IL, LB, UB, ST, Chunk);
|
|
|
|
}
|
|
|
|
|
|
|
|
void CGOpenMPRuntime::emitDistributeStaticInit(CodeGenFunction &CGF,
|
|
|
|
SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind,
|
|
|
|
unsigned IVSize, bool IVSigned,
|
|
|
|
bool Ordered, Address IL, Address LB,
|
|
|
|
Address UB, Address ST,
|
|
|
|
llvm::Value *Chunk) {
|
|
|
|
OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != nullptr);
|
|
|
|
auto *UpdatedLocation = emitUpdateLocation(CGF, Loc);
|
|
|
|
auto *ThreadId = getThreadID(CGF, Loc);
|
|
|
|
auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned);
|
|
|
|
emitForStaticInitCall(CGF, Loc, UpdatedLocation, ThreadId, StaticInitFunction,
|
|
|
|
ScheduleNum, IVSize, IVSigned, Ordered, IL, LB, UB, ST, Chunk);
|
2014-12-15 15:07:06 +08:00
|
|
|
}
|
|
|
|
|
2015-04-22 19:15:40 +08:00
|
|
|
void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
|
|
|
|
SourceLocation Loc) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2014-12-15 15:07:06 +08:00
|
|
|
// Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
|
2016-02-19 18:38:26 +08:00
|
|
|
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
|
2015-02-25 16:32:46 +08:00
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
|
|
|
|
Args);
|
2014-12-15 15:07:06 +08:00
|
|
|
}
|
|
|
|
|
2015-05-20 21:12:48 +08:00
|
|
|
void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
|
|
|
|
SourceLocation Loc,
|
|
|
|
unsigned IVSize,
|
|
|
|
bool IVSigned) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-04-22 19:15:40 +08:00
|
|
|
// Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
|
2016-02-19 18:38:26 +08:00
|
|
|
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
|
2015-04-22 19:15:40 +08:00
|
|
|
CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
|
|
|
|
}
|
|
|
|
|
2015-03-12 21:37:50 +08:00
|
|
|
llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
|
|
|
|
SourceLocation Loc, unsigned IVSize,
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
bool IVSigned, Address IL,
|
|
|
|
Address LB, Address UB,
|
|
|
|
Address ST) {
|
2015-03-12 21:37:50 +08:00
|
|
|
// Call __kmpc_dispatch_next(
|
|
|
|
// ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
|
|
|
|
// kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
|
|
|
|
// kmp_int[32|64] *p_stride);
|
|
|
|
llvm::Value *Args[] = {
|
2016-02-19 18:38:26 +08:00
|
|
|
emitUpdateLocation(CGF, Loc),
|
|
|
|
getThreadID(CGF, Loc),
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
IL.getPointer(), // &isLastIter
|
|
|
|
LB.getPointer(), // &Lower
|
|
|
|
UB.getPointer(), // &Upper
|
|
|
|
ST.getPointer() // &Stride
|
2015-03-12 21:37:50 +08:00
|
|
|
};
|
|
|
|
llvm::Value *Call =
|
|
|
|
CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
|
|
|
|
return CGF.EmitScalarConversion(
|
|
|
|
Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
|
2015-08-11 12:19:28 +08:00
|
|
|
CGF.getContext().BoolTy, Loc);
|
2015-03-12 21:37:50 +08:00
|
|
|
}
|
|
|
|
|
2015-02-25 16:32:46 +08:00
|
|
|
void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
|
|
|
|
llvm::Value *NumThreads,
|
|
|
|
SourceLocation Loc) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2014-10-13 16:23:51 +08:00
|
|
|
// Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
|
|
|
|
llvm::Value *Args[] = {
|
2015-02-25 16:32:46 +08:00
|
|
|
emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
|
2014-10-13 16:23:51 +08:00
|
|
|
CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
|
2015-02-25 16:32:46 +08:00
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
|
|
|
|
Args);
|
2014-10-13 16:23:51 +08:00
|
|
|
}
|
|
|
|
|
2015-06-18 21:40:03 +08:00
|
|
|
void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
|
|
|
|
OpenMPProcBindClauseKind ProcBind,
|
|
|
|
SourceLocation Loc) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-06-18 21:40:03 +08:00
|
|
|
// Constants for proc bind value accepted by the runtime.
|
|
|
|
enum ProcBindTy {
|
|
|
|
ProcBindFalse = 0,
|
|
|
|
ProcBindTrue,
|
|
|
|
ProcBindMaster,
|
|
|
|
ProcBindClose,
|
|
|
|
ProcBindSpread,
|
|
|
|
ProcBindIntel,
|
|
|
|
ProcBindDefault
|
|
|
|
} RuntimeProcBind;
|
|
|
|
switch (ProcBind) {
|
|
|
|
case OMPC_PROC_BIND_master:
|
|
|
|
RuntimeProcBind = ProcBindMaster;
|
|
|
|
break;
|
|
|
|
case OMPC_PROC_BIND_close:
|
|
|
|
RuntimeProcBind = ProcBindClose;
|
|
|
|
break;
|
|
|
|
case OMPC_PROC_BIND_spread:
|
|
|
|
RuntimeProcBind = ProcBindSpread;
|
|
|
|
break;
|
|
|
|
case OMPC_PROC_BIND_unknown:
|
|
|
|
llvm_unreachable("Unsupported proc_bind value.");
|
|
|
|
}
|
|
|
|
// Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
|
|
|
|
llvm::Value *Args[] = {
|
|
|
|
emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
|
|
|
|
llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
|
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
|
|
|
|
}
|
|
|
|
|
2015-02-25 16:32:46 +08:00
|
|
|
void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
|
|
|
|
SourceLocation Loc) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-02-24 20:55:09 +08:00
|
|
|
// Build call void __kmpc_flush(ident_t *loc)
|
2015-02-25 16:32:46 +08:00
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
|
|
|
|
emitUpdateLocation(CGF, Loc));
|
2014-11-20 12:34:54 +08:00
|
|
|
}
|
2015-02-25 16:32:46 +08:00
|
|
|
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
namespace {
|
|
|
|
/// \brief Indexes of fields for type kmp_task_t.
|
|
|
|
enum KmpTaskTFields {
|
|
|
|
/// \brief List of shared variables.
|
|
|
|
KmpTaskTShareds,
|
|
|
|
/// \brief Task routine.
|
|
|
|
KmpTaskTRoutine,
|
|
|
|
/// \brief Partition id for the untied tasks.
|
|
|
|
KmpTaskTPartId,
|
|
|
|
/// \brief Function with call of destructors for private variables.
|
|
|
|
KmpTaskTDestructors,
|
|
|
|
};
|
2015-09-11 01:07:54 +08:00
|
|
|
} // anonymous namespace
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
|
2016-01-06 21:42:12 +08:00
|
|
|
bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
|
|
|
|
// FIXME: Add other entries type when they become supported.
|
|
|
|
return OffloadEntriesTargetRegion.empty();
|
|
|
|
}
|
|
|
|
|
|
|
|
/// \brief Initialize target region entry.
|
|
|
|
void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
|
|
|
|
initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
|
|
|
|
StringRef ParentName, unsigned LineNum,
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
unsigned Order) {
|
2016-01-06 21:42:12 +08:00
|
|
|
assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
|
|
|
|
"only required for the device "
|
|
|
|
"code generation.");
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
|
2016-01-06 21:42:12 +08:00
|
|
|
OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr);
|
|
|
|
++OffloadingEntriesNum;
|
|
|
|
}
|
|
|
|
|
|
|
|
void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
|
|
|
|
registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
|
|
|
|
StringRef ParentName, unsigned LineNum,
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
llvm::Constant *Addr, llvm::Constant *ID) {
|
2016-01-06 21:42:12 +08:00
|
|
|
// If we are emitting code for a target, the entry is already initialized,
|
|
|
|
// only has to be registered.
|
|
|
|
if (CGM.getLangOpts().OpenMPIsDevice) {
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
|
2016-01-06 21:42:12 +08:00
|
|
|
"Entry must exist.");
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
auto &Entry =
|
|
|
|
OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
|
2016-01-06 21:42:12 +08:00
|
|
|
assert(Entry.isValid() && "Entry not initialized!");
|
|
|
|
Entry.setAddress(Addr);
|
|
|
|
Entry.setID(ID);
|
|
|
|
return;
|
|
|
|
} else {
|
|
|
|
OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID);
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
|
2016-01-06 21:42:12 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
unsigned DeviceID, unsigned FileID, StringRef ParentName,
|
|
|
|
unsigned LineNum) const {
|
2016-01-06 21:42:12 +08:00
|
|
|
auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
|
|
|
|
if (PerDevice == OffloadEntriesTargetRegion.end())
|
|
|
|
return false;
|
|
|
|
auto PerFile = PerDevice->second.find(FileID);
|
|
|
|
if (PerFile == PerDevice->second.end())
|
|
|
|
return false;
|
|
|
|
auto PerParentName = PerFile->second.find(ParentName);
|
|
|
|
if (PerParentName == PerFile->second.end())
|
|
|
|
return false;
|
|
|
|
auto PerLine = PerParentName->second.find(LineNum);
|
|
|
|
if (PerLine == PerParentName->second.end())
|
|
|
|
return false;
|
|
|
|
// Fail if this entry is already registered.
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
if (PerLine->second.getAddress() || PerLine->second.getID())
|
2016-01-06 21:42:12 +08:00
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
|
|
|
|
const OffloadTargetRegionEntryInfoActTy &Action) {
|
|
|
|
// Scan all target region entries and perform the provided action.
|
|
|
|
for (auto &D : OffloadEntriesTargetRegion)
|
|
|
|
for (auto &F : D.second)
|
|
|
|
for (auto &P : F.second)
|
|
|
|
for (auto &L : P.second)
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
Action(D.first, F.first, P.first(), L.first, L.second);
|
2016-01-06 21:42:12 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// \brief Create a Ctor/Dtor-like function whose body is emitted through
|
|
|
|
/// \a Codegen. This is used to emit the two functions that register and
|
|
|
|
/// unregister the descriptor of the current compilation unit.
|
|
|
|
static llvm::Function *
|
|
|
|
createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name,
|
|
|
|
const RegionCodeGenTy &Codegen) {
|
|
|
|
auto &C = CGM.getContext();
|
|
|
|
FunctionArgList Args;
|
|
|
|
ImplicitParamDecl DummyPtr(C, /*DC=*/nullptr, SourceLocation(),
|
|
|
|
/*Id=*/nullptr, C.VoidPtrTy);
|
|
|
|
Args.push_back(&DummyPtr);
|
|
|
|
|
|
|
|
CodeGenFunction CGF(CGM);
|
|
|
|
GlobalDecl();
|
2016-03-11 12:30:31 +08:00
|
|
|
auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
|
2016-01-06 21:42:12 +08:00
|
|
|
auto FTy = CGM.getTypes().GetFunctionType(FI);
|
|
|
|
auto *Fn =
|
|
|
|
CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation());
|
|
|
|
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation());
|
|
|
|
Codegen(CGF);
|
|
|
|
CGF.FinishFunction();
|
|
|
|
return Fn;
|
|
|
|
}
|
|
|
|
|
|
|
|
llvm::Function *
|
|
|
|
CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
|
|
|
|
|
|
|
|
// If we don't have entries or if we are emitting code for the device, we
|
|
|
|
// don't need to do anything.
|
|
|
|
if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
auto &M = CGM.getModule();
|
|
|
|
auto &C = CGM.getContext();
|
|
|
|
|
|
|
|
// Get list of devices we care about
|
|
|
|
auto &Devices = CGM.getLangOpts().OMPTargetTriples;
|
|
|
|
|
|
|
|
// We should be creating an offloading descriptor only if there are devices
|
|
|
|
// specified.
|
|
|
|
assert(!Devices.empty() && "No OpenMP offloading devices??");
|
|
|
|
|
|
|
|
// Create the external variables that will point to the begin and end of the
|
|
|
|
// host entries section. These will be defined by the linker.
|
|
|
|
auto *OffloadEntryTy =
|
|
|
|
CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
|
|
|
|
llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable(
|
|
|
|
M, OffloadEntryTy, /*isConstant=*/true,
|
2016-01-27 03:01:06 +08:00
|
|
|
llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
|
2016-01-06 21:42:12 +08:00
|
|
|
".omp_offloading.entries_begin");
|
|
|
|
llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable(
|
|
|
|
M, OffloadEntryTy, /*isConstant=*/true,
|
2016-01-27 03:01:06 +08:00
|
|
|
llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
|
2016-01-06 21:42:12 +08:00
|
|
|
".omp_offloading.entries_end");
|
|
|
|
|
|
|
|
// Create all device images
|
|
|
|
llvm::SmallVector<llvm::Constant *, 4> DeviceImagesEntires;
|
|
|
|
auto *DeviceImageTy = cast<llvm::StructType>(
|
|
|
|
CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < Devices.size(); ++i) {
|
|
|
|
StringRef T = Devices[i].getTriple();
|
|
|
|
auto *ImgBegin = new llvm::GlobalVariable(
|
|
|
|
M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
|
2016-01-27 03:01:06 +08:00
|
|
|
/*Initializer=*/nullptr,
|
|
|
|
Twine(".omp_offloading.img_start.") + Twine(T));
|
2016-01-06 21:42:12 +08:00
|
|
|
auto *ImgEnd = new llvm::GlobalVariable(
|
|
|
|
M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
|
2016-01-27 03:01:06 +08:00
|
|
|
/*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T));
|
2016-01-06 21:42:12 +08:00
|
|
|
|
|
|
|
llvm::Constant *Dev =
|
|
|
|
llvm::ConstantStruct::get(DeviceImageTy, ImgBegin, ImgEnd,
|
|
|
|
HostEntriesBegin, HostEntriesEnd, nullptr);
|
|
|
|
DeviceImagesEntires.push_back(Dev);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create device images global array.
|
|
|
|
llvm::ArrayType *DeviceImagesInitTy =
|
|
|
|
llvm::ArrayType::get(DeviceImageTy, DeviceImagesEntires.size());
|
|
|
|
llvm::Constant *DeviceImagesInit =
|
|
|
|
llvm::ConstantArray::get(DeviceImagesInitTy, DeviceImagesEntires);
|
|
|
|
|
|
|
|
llvm::GlobalVariable *DeviceImages = new llvm::GlobalVariable(
|
|
|
|
M, DeviceImagesInitTy, /*isConstant=*/true,
|
|
|
|
llvm::GlobalValue::InternalLinkage, DeviceImagesInit,
|
|
|
|
".omp_offloading.device_images");
|
|
|
|
DeviceImages->setUnnamedAddr(true);
|
|
|
|
|
|
|
|
// This is a Zero array to be used in the creation of the constant expressions
|
|
|
|
llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
|
|
|
|
llvm::Constant::getNullValue(CGM.Int32Ty)};
|
|
|
|
|
|
|
|
// Create the target region descriptor.
|
|
|
|
auto *BinaryDescriptorTy = cast<llvm::StructType>(
|
|
|
|
CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy()));
|
|
|
|
llvm::Constant *TargetRegionsDescriptorInit = llvm::ConstantStruct::get(
|
|
|
|
BinaryDescriptorTy, llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
|
|
|
|
llvm::ConstantExpr::getGetElementPtr(DeviceImagesInitTy, DeviceImages,
|
|
|
|
Index),
|
|
|
|
HostEntriesBegin, HostEntriesEnd, nullptr);
|
|
|
|
|
|
|
|
auto *Desc = new llvm::GlobalVariable(
|
|
|
|
M, BinaryDescriptorTy, /*isConstant=*/true,
|
|
|
|
llvm::GlobalValue::InternalLinkage, TargetRegionsDescriptorInit,
|
|
|
|
".omp_offloading.descriptor");
|
|
|
|
|
|
|
|
// Emit code to register or unregister the descriptor at execution
|
|
|
|
// startup or closing, respectively.
|
|
|
|
|
|
|
|
// Create a variable to drive the registration and unregistration of the
|
|
|
|
// descriptor, so we can reuse the logic that emits Ctors and Dtors.
|
|
|
|
auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var");
|
|
|
|
ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(),
|
|
|
|
IdentInfo, C.CharTy);
|
|
|
|
|
|
|
|
auto *UnRegFn = createOffloadingBinaryDescriptorFunction(
|
2016-03-28 20:58:34 +08:00
|
|
|
CGM, ".omp_offloading.descriptor_unreg", [&](CodeGenFunction &CGF) {
|
2016-01-06 21:42:12 +08:00
|
|
|
CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
|
|
|
|
Desc);
|
|
|
|
});
|
|
|
|
auto *RegFn = createOffloadingBinaryDescriptorFunction(
|
2016-03-28 20:58:34 +08:00
|
|
|
CGM, ".omp_offloading.descriptor_reg", [&](CodeGenFunction &CGF) {
|
2016-01-06 21:42:12 +08:00
|
|
|
CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib),
|
|
|
|
Desc);
|
|
|
|
CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
|
|
|
|
});
|
|
|
|
return RegFn;
|
|
|
|
}
|
|
|
|
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID,
|
|
|
|
llvm::Constant *Addr, uint64_t Size) {
|
|
|
|
StringRef Name = Addr->getName();
|
2016-01-06 21:42:12 +08:00
|
|
|
auto *TgtOffloadEntryType = cast<llvm::StructType>(
|
|
|
|
CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()));
|
|
|
|
llvm::LLVMContext &C = CGM.getModule().getContext();
|
|
|
|
llvm::Module &M = CGM.getModule();
|
|
|
|
|
|
|
|
// Make sure the address has the right type.
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy);
|
2016-01-06 21:42:12 +08:00
|
|
|
|
|
|
|
// Create constant string with the name.
|
|
|
|
llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
|
|
|
|
|
|
|
|
llvm::GlobalVariable *Str =
|
|
|
|
new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true,
|
|
|
|
llvm::GlobalValue::InternalLinkage, StrPtrInit,
|
|
|
|
".omp_offloading.entry_name");
|
|
|
|
Str->setUnnamedAddr(true);
|
|
|
|
llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy);
|
|
|
|
|
|
|
|
// Create the entry struct.
|
|
|
|
llvm::Constant *EntryInit = llvm::ConstantStruct::get(
|
|
|
|
TgtOffloadEntryType, AddrPtr, StrPtr,
|
|
|
|
llvm::ConstantInt::get(CGM.SizeTy, Size), nullptr);
|
|
|
|
llvm::GlobalVariable *Entry = new llvm::GlobalVariable(
|
|
|
|
M, TgtOffloadEntryType, true, llvm::GlobalValue::ExternalLinkage,
|
|
|
|
EntryInit, ".omp_offloading.entry");
|
|
|
|
|
|
|
|
// The entry has to be created in the section the linker expects it to be.
|
|
|
|
Entry->setSection(".omp_offloading.entries");
|
|
|
|
// We can't have any padding between symbols, so we need to have 1-byte
|
|
|
|
// alignment.
|
|
|
|
Entry->setAlignment(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
|
|
|
|
// Emit the offloading entries and metadata so that the device codegen side
|
|
|
|
// can
|
|
|
|
// easily figure out what to emit. The produced metadata looks like this:
|
|
|
|
//
|
|
|
|
// !omp_offload.info = !{!1, ...}
|
|
|
|
//
|
|
|
|
// Right now we only generate metadata for function that contain target
|
|
|
|
// regions.
|
|
|
|
|
|
|
|
// If we do not have entries, we dont need to do anything.
|
|
|
|
if (OffloadEntriesInfoManager.empty())
|
|
|
|
return;
|
|
|
|
|
|
|
|
llvm::Module &M = CGM.getModule();
|
|
|
|
llvm::LLVMContext &C = M.getContext();
|
|
|
|
SmallVector<OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
|
|
|
|
OrderedEntries(OffloadEntriesInfoManager.size());
|
|
|
|
|
|
|
|
// Create the offloading info metadata node.
|
|
|
|
llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
|
|
|
|
|
|
|
|
// Auxiliar methods to create metadata values and strings.
|
|
|
|
auto getMDInt = [&](unsigned v) {
|
|
|
|
return llvm::ConstantAsMetadata::get(
|
|
|
|
llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v));
|
|
|
|
};
|
|
|
|
|
|
|
|
auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); };
|
|
|
|
|
|
|
|
// Create function that emits metadata for each target region entry;
|
|
|
|
auto &&TargetRegionMetadataEmitter = [&](
|
|
|
|
unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line,
|
|
|
|
OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
|
|
|
|
llvm::SmallVector<llvm::Metadata *, 32> Ops;
|
|
|
|
// Generate metadata for target regions. Each entry of this metadata
|
|
|
|
// contains:
|
|
|
|
// - Entry 0 -> Kind of this type of metadata (0).
|
|
|
|
// - Entry 1 -> Device ID of the file where the entry was identified.
|
|
|
|
// - Entry 2 -> File ID of the file where the entry was identified.
|
|
|
|
// - Entry 3 -> Mangled name of the function where the entry was identified.
|
|
|
|
// - Entry 4 -> Line in the file where the entry was identified.
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
// - Entry 5 -> Order the entry was created.
|
2016-01-06 21:42:12 +08:00
|
|
|
// The first element of the metadata node is the kind.
|
|
|
|
Ops.push_back(getMDInt(E.getKind()));
|
|
|
|
Ops.push_back(getMDInt(DeviceID));
|
|
|
|
Ops.push_back(getMDInt(FileID));
|
|
|
|
Ops.push_back(getMDString(ParentName));
|
|
|
|
Ops.push_back(getMDInt(Line));
|
|
|
|
Ops.push_back(getMDInt(E.getOrder()));
|
|
|
|
|
|
|
|
// Save this entry in the right position of the ordered entries array.
|
|
|
|
OrderedEntries[E.getOrder()] = &E;
|
|
|
|
|
|
|
|
// Add metadata to the named metadata node.
|
|
|
|
MD->addOperand(llvm::MDNode::get(C, Ops));
|
|
|
|
};
|
|
|
|
|
|
|
|
OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
|
|
|
|
TargetRegionMetadataEmitter);
|
|
|
|
|
|
|
|
for (auto *E : OrderedEntries) {
|
|
|
|
assert(E && "All ordered entries must exist!");
|
|
|
|
if (auto *CE =
|
|
|
|
dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
|
|
|
|
E)) {
|
|
|
|
assert(CE->getID() && CE->getAddress() &&
|
|
|
|
"Entry ID and Addr are invalid!");
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0);
|
2016-01-06 21:42:12 +08:00
|
|
|
} else
|
|
|
|
llvm_unreachable("Unsupported entry kind.");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// \brief Loads all the offload entries information from the host IR
|
|
|
|
/// metadata.
|
|
|
|
void CGOpenMPRuntime::loadOffloadInfoMetadata() {
|
|
|
|
// If we are in target mode, load the metadata from the host IR. This code has
|
|
|
|
// to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
|
|
|
|
|
|
|
|
if (!CGM.getLangOpts().OpenMPIsDevice)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (CGM.getLangOpts().OMPHostIRFile.empty())
|
|
|
|
return;
|
|
|
|
|
|
|
|
auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
|
|
|
|
if (Buf.getError())
|
|
|
|
return;
|
|
|
|
|
|
|
|
llvm::LLVMContext C;
|
|
|
|
auto ME = llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C);
|
|
|
|
|
|
|
|
if (ME.getError())
|
|
|
|
return;
|
|
|
|
|
|
|
|
llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
|
|
|
|
if (!MD)
|
|
|
|
return;
|
|
|
|
|
|
|
|
for (auto I : MD->operands()) {
|
|
|
|
llvm::MDNode *MN = cast<llvm::MDNode>(I);
|
|
|
|
|
|
|
|
auto getMDInt = [&](unsigned Idx) {
|
|
|
|
llvm::ConstantAsMetadata *V =
|
|
|
|
cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
|
|
|
|
return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
|
|
|
|
};
|
|
|
|
|
|
|
|
auto getMDString = [&](unsigned Idx) {
|
|
|
|
llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx));
|
|
|
|
return V->getString();
|
|
|
|
};
|
|
|
|
|
|
|
|
switch (getMDInt(0)) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unexpected metadata!");
|
|
|
|
break;
|
|
|
|
case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
|
|
|
|
OFFLOAD_ENTRY_INFO_TARGET_REGION:
|
|
|
|
OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
|
|
|
|
/*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2),
|
|
|
|
/*ParentName=*/getMDString(3), /*Line=*/getMDInt(4),
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
/*Order=*/getMDInt(5));
|
2016-01-06 21:42:12 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
|
|
|
|
if (!KmpRoutineEntryPtrTy) {
|
|
|
|
// Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
|
|
|
|
auto &C = CGM.getContext();
|
|
|
|
QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
|
|
|
|
FunctionProtoType::ExtProtoInfo EPI;
|
|
|
|
KmpRoutineEntryPtrQTy = C.getPointerType(
|
|
|
|
C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
|
|
|
|
KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-09-11 18:29:41 +08:00
|
|
|
static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
|
|
|
|
QualType FieldTy) {
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
auto *Field = FieldDecl::Create(
|
|
|
|
C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
|
|
|
|
C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
|
|
|
|
/*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
|
|
|
|
Field->setAccess(AS_public);
|
|
|
|
DC->addDecl(Field);
|
2015-09-11 18:29:41 +08:00
|
|
|
return Field;
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
}
|
|
|
|
|
2016-01-06 21:42:12 +08:00
|
|
|
QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
|
|
|
|
|
|
|
|
// Make sure the type of the entry is already created. This is the type we
|
|
|
|
// have to create:
|
|
|
|
// struct __tgt_offload_entry{
|
|
|
|
// void *addr; // Pointer to the offload entry info.
|
|
|
|
// // (function or global)
|
|
|
|
// char *name; // Name of the function or global.
|
|
|
|
// size_t size; // Size of the entry info (0 if it a function).
|
|
|
|
// };
|
|
|
|
if (TgtOffloadEntryQTy.isNull()) {
|
|
|
|
ASTContext &C = CGM.getContext();
|
|
|
|
auto *RD = C.buildImplicitRecord("__tgt_offload_entry");
|
|
|
|
RD->startDefinition();
|
|
|
|
addFieldToRecordDecl(C, RD, C.VoidPtrTy);
|
|
|
|
addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
|
|
|
|
addFieldToRecordDecl(C, RD, C.getSizeType());
|
|
|
|
RD->completeDefinition();
|
|
|
|
TgtOffloadEntryQTy = C.getRecordType(RD);
|
|
|
|
}
|
|
|
|
return TgtOffloadEntryQTy;
|
|
|
|
}
|
|
|
|
|
|
|
|
QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
|
|
|
|
// These are the types we need to build:
|
|
|
|
// struct __tgt_device_image{
|
|
|
|
// void *ImageStart; // Pointer to the target code start.
|
|
|
|
// void *ImageEnd; // Pointer to the target code end.
|
|
|
|
// // We also add the host entries to the device image, as it may be useful
|
|
|
|
// // for the target runtime to have access to that information.
|
|
|
|
// __tgt_offload_entry *EntriesBegin; // Begin of the table with all
|
|
|
|
// // the entries.
|
|
|
|
// __tgt_offload_entry *EntriesEnd; // End of the table with all the
|
|
|
|
// // entries (non inclusive).
|
|
|
|
// };
|
|
|
|
if (TgtDeviceImageQTy.isNull()) {
|
|
|
|
ASTContext &C = CGM.getContext();
|
|
|
|
auto *RD = C.buildImplicitRecord("__tgt_device_image");
|
|
|
|
RD->startDefinition();
|
|
|
|
addFieldToRecordDecl(C, RD, C.VoidPtrTy);
|
|
|
|
addFieldToRecordDecl(C, RD, C.VoidPtrTy);
|
|
|
|
addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
|
|
|
|
addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
|
|
|
|
RD->completeDefinition();
|
|
|
|
TgtDeviceImageQTy = C.getRecordType(RD);
|
|
|
|
}
|
|
|
|
return TgtDeviceImageQTy;
|
|
|
|
}
|
|
|
|
|
|
|
|
QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
|
|
|
|
// struct __tgt_bin_desc{
|
|
|
|
// int32_t NumDevices; // Number of devices supported.
|
|
|
|
// __tgt_device_image *DeviceImages; // Arrays of device images
|
|
|
|
// // (one per device).
|
|
|
|
// __tgt_offload_entry *EntriesBegin; // Begin of the table with all the
|
|
|
|
// // entries.
|
|
|
|
// __tgt_offload_entry *EntriesEnd; // End of the table with all the
|
|
|
|
// // entries (non inclusive).
|
|
|
|
// };
|
|
|
|
if (TgtBinaryDescriptorQTy.isNull()) {
|
|
|
|
ASTContext &C = CGM.getContext();
|
|
|
|
auto *RD = C.buildImplicitRecord("__tgt_bin_desc");
|
|
|
|
RD->startDefinition();
|
|
|
|
addFieldToRecordDecl(
|
|
|
|
C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
|
|
|
|
addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
|
|
|
|
addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
|
|
|
|
addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
|
|
|
|
RD->completeDefinition();
|
|
|
|
TgtBinaryDescriptorQTy = C.getRecordType(RD);
|
|
|
|
}
|
|
|
|
return TgtBinaryDescriptorQTy;
|
|
|
|
}
|
|
|
|
|
2015-04-30 14:51:57 +08:00
|
|
|
namespace {
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
struct PrivateHelpersTy {
|
|
|
|
PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
|
|
|
|
const VarDecl *PrivateElemInit)
|
|
|
|
: Original(Original), PrivateCopy(PrivateCopy),
|
|
|
|
PrivateElemInit(PrivateElemInit) {}
|
|
|
|
const VarDecl *Original;
|
|
|
|
const VarDecl *PrivateCopy;
|
|
|
|
const VarDecl *PrivateElemInit;
|
|
|
|
};
|
|
|
|
typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
|
2015-09-11 01:07:54 +08:00
|
|
|
} // anonymous namespace
|
2015-04-30 14:51:57 +08:00
|
|
|
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
static RecordDecl *
|
2015-09-29 12:30:07 +08:00
|
|
|
createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
|
2015-04-30 14:51:57 +08:00
|
|
|
if (!Privates.empty()) {
|
|
|
|
auto &C = CGM.getContext();
|
|
|
|
// Build struct .kmp_privates_t. {
|
|
|
|
// /* private vars */
|
|
|
|
// };
|
|
|
|
auto *RD = C.buildImplicitRecord(".kmp_privates.t");
|
|
|
|
RD->startDefinition();
|
|
|
|
for (auto &&Pair : Privates) {
|
2015-09-11 18:29:41 +08:00
|
|
|
auto *VD = Pair.second.Original;
|
|
|
|
auto Type = VD->getType();
|
2015-05-19 20:31:28 +08:00
|
|
|
Type = Type.getNonReferenceType();
|
2015-09-11 18:29:41 +08:00
|
|
|
auto *FD = addFieldToRecordDecl(C, RD, Type);
|
|
|
|
if (VD->hasAttrs()) {
|
|
|
|
for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
|
|
|
|
E(VD->getAttrs().end());
|
|
|
|
I != E; ++I)
|
|
|
|
FD->addAttr(*I);
|
|
|
|
}
|
2015-04-30 14:51:57 +08:00
|
|
|
}
|
|
|
|
RD->completeDefinition();
|
|
|
|
return RD;
|
|
|
|
}
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
static RecordDecl *
|
|
|
|
createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty,
|
2015-05-18 15:54:53 +08:00
|
|
|
QualType KmpRoutineEntryPointerQTy) {
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
auto &C = CGM.getContext();
|
|
|
|
// Build struct kmp_task_t {
|
|
|
|
// void * shareds;
|
|
|
|
// kmp_routine_entry_t routine;
|
|
|
|
// kmp_int32 part_id;
|
|
|
|
// kmp_routine_entry_t destructors;
|
|
|
|
// };
|
|
|
|
auto *RD = C.buildImplicitRecord("kmp_task_t");
|
|
|
|
RD->startDefinition();
|
|
|
|
addFieldToRecordDecl(C, RD, C.VoidPtrTy);
|
|
|
|
addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
|
|
|
|
addFieldToRecordDecl(C, RD, KmpInt32Ty);
|
|
|
|
addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
|
2015-05-18 15:54:53 +08:00
|
|
|
RD->completeDefinition();
|
|
|
|
return RD;
|
|
|
|
}
|
|
|
|
|
|
|
|
static RecordDecl *
|
|
|
|
createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
|
2015-09-29 12:30:07 +08:00
|
|
|
ArrayRef<PrivateDataTy> Privates) {
|
2015-05-18 15:54:53 +08:00
|
|
|
auto &C = CGM.getContext();
|
|
|
|
// Build struct kmp_task_t_with_privates {
|
|
|
|
// kmp_task_t task_data;
|
|
|
|
// .kmp_privates_t. privates;
|
|
|
|
// };
|
|
|
|
auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
|
|
|
|
RD->startDefinition();
|
|
|
|
addFieldToRecordDecl(C, RD, KmpTaskTQTy);
|
2015-04-30 14:51:57 +08:00
|
|
|
if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
|
|
|
|
addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
|
|
|
|
}
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
RD->completeDefinition();
|
2015-04-30 14:51:57 +08:00
|
|
|
return RD;
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// \brief Emit a proxy function which accepts kmp_task_t as the second
|
|
|
|
/// argument.
|
|
|
|
/// \code
|
|
|
|
/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map,
|
|
|
|
/// tt->shareds);
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
/// return 0;
|
|
|
|
/// }
|
|
|
|
/// \endcode
|
|
|
|
static llvm::Value *
|
|
|
|
emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
|
2015-05-18 15:54:53 +08:00
|
|
|
QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy,
|
|
|
|
QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
QualType SharedsPtrTy, llvm::Value *TaskFunction,
|
|
|
|
llvm::Value *TaskPrivatesMap) {
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
auto &C = CGM.getContext();
|
|
|
|
FunctionArgList Args;
|
|
|
|
ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
|
|
|
|
ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
|
2015-09-10 16:12:02 +08:00
|
|
|
/*Id=*/nullptr,
|
|
|
|
KmpTaskTWithPrivatesPtrQTy.withRestrict());
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
Args.push_back(&GtidArg);
|
|
|
|
Args.push_back(&TaskTypeArg);
|
|
|
|
auto &TaskEntryFnInfo =
|
2016-03-11 12:30:31 +08:00
|
|
|
CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
|
|
|
|
auto *TaskEntry =
|
|
|
|
llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
|
|
|
|
".omp_task_entry.", &CGM.getModule());
|
2015-10-28 10:30:47 +08:00
|
|
|
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo);
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
CodeGenFunction CGF(CGM);
|
|
|
|
CGF.disableDebugInfo();
|
|
|
|
CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
|
|
|
|
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
// TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
|
|
|
|
// tt->task_data.shareds);
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
auto *GtidParam = CGF.EmitLoadOfScalar(
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
|
2016-02-04 19:27:03 +08:00
|
|
|
LValue TDBase = CGF.EmitLoadOfPointerLValue(
|
|
|
|
CGF.GetAddrOfLocalVar(&TaskTypeArg),
|
|
|
|
KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
|
2015-05-18 15:54:53 +08:00
|
|
|
auto *KmpTaskTWithPrivatesQTyRD =
|
|
|
|
cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
LValue Base =
|
|
|
|
CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
|
2015-05-18 15:54:53 +08:00
|
|
|
auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
|
|
|
|
auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
|
|
|
|
auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
|
|
|
|
auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal();
|
|
|
|
|
|
|
|
auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
|
|
|
|
auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
|
2015-04-30 14:51:57 +08:00
|
|
|
auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
2015-05-18 15:54:53 +08:00
|
|
|
CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
|
2015-04-30 14:51:57 +08:00
|
|
|
CGF.ConvertTypeForMem(SharedsPtrTy));
|
|
|
|
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
|
|
|
|
llvm::Value *PrivatesParam;
|
|
|
|
if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
|
|
|
|
auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
|
|
|
|
PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
PrivatesLVal.getPointer(), CGF.VoidPtrTy);
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
} else {
|
|
|
|
PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
|
|
|
|
}
|
|
|
|
|
|
|
|
llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam,
|
|
|
|
TaskPrivatesMap, SharedsParam};
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
|
|
|
|
CGF.EmitStoreThroughLValue(
|
|
|
|
RValue::get(CGF.Builder.getInt32(/*C=*/0)),
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
CGF.FinishFunction();
|
|
|
|
return TaskEntry;
|
|
|
|
}
|
|
|
|
|
2015-05-18 15:54:53 +08:00
|
|
|
static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
|
|
|
|
SourceLocation Loc,
|
|
|
|
QualType KmpInt32Ty,
|
|
|
|
QualType KmpTaskTWithPrivatesPtrQTy,
|
|
|
|
QualType KmpTaskTWithPrivatesQTy) {
|
2015-04-30 14:51:57 +08:00
|
|
|
auto &C = CGM.getContext();
|
|
|
|
FunctionArgList Args;
|
|
|
|
ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
|
|
|
|
ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
|
2015-09-10 16:12:02 +08:00
|
|
|
/*Id=*/nullptr,
|
|
|
|
KmpTaskTWithPrivatesPtrQTy.withRestrict());
|
2015-04-30 14:51:57 +08:00
|
|
|
Args.push_back(&GtidArg);
|
|
|
|
Args.push_back(&TaskTypeArg);
|
|
|
|
FunctionType::ExtInfo Info;
|
|
|
|
auto &DestructorFnInfo =
|
2016-03-11 12:30:31 +08:00
|
|
|
CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
|
2015-04-30 14:51:57 +08:00
|
|
|
auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
|
|
|
|
auto *DestructorFn =
|
|
|
|
llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
|
|
|
|
".omp_task_destructor.", &CGM.getModule());
|
2015-10-28 10:30:47 +08:00
|
|
|
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn,
|
|
|
|
DestructorFnInfo);
|
2015-04-30 14:51:57 +08:00
|
|
|
CodeGenFunction CGF(CGM);
|
|
|
|
CGF.disableDebugInfo();
|
|
|
|
CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
|
|
|
|
Args);
|
|
|
|
|
2016-02-04 19:27:03 +08:00
|
|
|
LValue Base = CGF.EmitLoadOfPointerLValue(
|
|
|
|
CGF.GetAddrOfLocalVar(&TaskTypeArg),
|
|
|
|
KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
|
2015-05-18 15:54:53 +08:00
|
|
|
auto *KmpTaskTWithPrivatesQTyRD =
|
|
|
|
cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
|
|
|
|
auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
|
2015-04-30 14:51:57 +08:00
|
|
|
Base = CGF.EmitLValueForField(Base, *FI);
|
|
|
|
for (auto *Field :
|
|
|
|
cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
|
|
|
|
if (auto DtorKind = Field->getType().isDestructedType()) {
|
|
|
|
auto FieldLValue = CGF.EmitLValueForField(Base, Field);
|
|
|
|
CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
CGF.FinishFunction();
|
|
|
|
return DestructorFn;
|
|
|
|
}
|
|
|
|
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
/// \brief Emit a privates mapping function for correct handling of private and
|
|
|
|
/// firstprivate variables.
|
|
|
|
/// \code
|
|
|
|
/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
|
|
|
|
/// **noalias priv1,..., <tyn> **noalias privn) {
|
|
|
|
/// *priv1 = &.privates.priv1;
|
|
|
|
/// ...;
|
|
|
|
/// *privn = &.privates.privn;
|
|
|
|
/// }
|
|
|
|
/// \endcode
|
|
|
|
static llvm::Value *
|
|
|
|
emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
|
2015-09-29 12:30:07 +08:00
|
|
|
ArrayRef<const Expr *> PrivateVars,
|
|
|
|
ArrayRef<const Expr *> FirstprivateVars,
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
QualType PrivatesQTy,
|
2015-09-29 12:30:07 +08:00
|
|
|
ArrayRef<PrivateDataTy> Privates) {
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
auto &C = CGM.getContext();
|
|
|
|
FunctionArgList Args;
|
|
|
|
ImplicitParamDecl TaskPrivatesArg(
|
|
|
|
C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
|
|
|
|
C.getPointerType(PrivatesQTy).withConst().withRestrict());
|
|
|
|
Args.push_back(&TaskPrivatesArg);
|
|
|
|
llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
|
|
|
|
unsigned Counter = 1;
|
|
|
|
for (auto *E: PrivateVars) {
|
|
|
|
Args.push_back(ImplicitParamDecl::Create(
|
|
|
|
C, /*DC=*/nullptr, Loc,
|
|
|
|
/*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
|
|
|
|
.withConst()
|
|
|
|
.withRestrict()));
|
|
|
|
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
|
|
|
|
PrivateVarsPos[VD] = Counter;
|
|
|
|
++Counter;
|
|
|
|
}
|
|
|
|
for (auto *E : FirstprivateVars) {
|
|
|
|
Args.push_back(ImplicitParamDecl::Create(
|
|
|
|
C, /*DC=*/nullptr, Loc,
|
|
|
|
/*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
|
|
|
|
.withConst()
|
|
|
|
.withRestrict()));
|
|
|
|
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
|
|
|
|
PrivateVarsPos[VD] = Counter;
|
|
|
|
++Counter;
|
|
|
|
}
|
|
|
|
auto &TaskPrivatesMapFnInfo =
|
2016-03-11 12:30:31 +08:00
|
|
|
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
auto *TaskPrivatesMapTy =
|
|
|
|
CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
|
|
|
|
auto *TaskPrivatesMap = llvm::Function::Create(
|
|
|
|
TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
|
|
|
|
".omp_task_privates_map.", &CGM.getModule());
|
2015-10-28 10:30:47 +08:00
|
|
|
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap,
|
|
|
|
TaskPrivatesMapFnInfo);
|
2015-09-15 05:35:16 +08:00
|
|
|
TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
CodeGenFunction CGF(CGM);
|
|
|
|
CGF.disableDebugInfo();
|
|
|
|
CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
|
|
|
|
TaskPrivatesMapFnInfo, Args);
|
|
|
|
|
|
|
|
// *privi = &.privates.privi;
|
2016-02-04 19:27:03 +08:00
|
|
|
LValue Base = CGF.EmitLoadOfPointerLValue(
|
|
|
|
CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
|
|
|
|
TaskPrivatesArg.getType()->castAs<PointerType>());
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
|
|
|
|
Counter = 0;
|
|
|
|
for (auto *Field : PrivatesQTyRD->fields()) {
|
|
|
|
auto FieldLVal = CGF.EmitLValueForField(Base, Field);
|
|
|
|
auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
|
2016-02-04 19:27:03 +08:00
|
|
|
auto RefLoadLVal = CGF.EmitLoadOfPointerLValue(
|
|
|
|
RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
|
2015-09-10 16:12:02 +08:00
|
|
|
CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
++Counter;
|
|
|
|
}
|
|
|
|
CGF.FinishFunction();
|
|
|
|
return TaskPrivatesMap;
|
|
|
|
}
|
|
|
|
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
static int array_pod_sort_comparator(const PrivateDataTy *P1,
|
|
|
|
const PrivateDataTy *P2) {
|
2015-04-30 14:51:57 +08:00
|
|
|
return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
|
|
|
|
}
|
|
|
|
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
void CGOpenMPRuntime::emitTaskCall(
|
2015-04-30 14:51:57 +08:00
|
|
|
CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
|
|
|
|
bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
|
|
|
|
ArrayRef<const Expr *> PrivateCopies,
|
|
|
|
ArrayRef<const Expr *> FirstprivateVars,
|
|
|
|
ArrayRef<const Expr *> FirstprivateCopies,
|
|
|
|
ArrayRef<const Expr *> FirstprivateInits,
|
|
|
|
ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
auto &C = CGM.getContext();
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
llvm::SmallVector<PrivateDataTy, 8> Privates;
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
// Aggregate privates and sort them by the alignment.
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
auto I = PrivateCopies.begin();
|
2015-04-30 14:51:57 +08:00
|
|
|
for (auto *E : PrivateVars) {
|
|
|
|
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
|
|
|
|
Privates.push_back(std::make_pair(
|
2015-09-11 18:29:41 +08:00
|
|
|
C.getDeclAlign(VD),
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
|
|
|
|
/*PrivateElemInit=*/nullptr)));
|
2015-04-30 14:51:57 +08:00
|
|
|
++I;
|
|
|
|
}
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
I = FirstprivateCopies.begin();
|
|
|
|
auto IElemInitRef = FirstprivateInits.begin();
|
|
|
|
for (auto *E : FirstprivateVars) {
|
|
|
|
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
|
|
|
|
Privates.push_back(std::make_pair(
|
2015-09-11 18:29:41 +08:00
|
|
|
C.getDeclAlign(VD),
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
PrivateHelpersTy(
|
|
|
|
VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
|
|
|
|
cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
|
2016-02-19 06:34:54 +08:00
|
|
|
++I;
|
|
|
|
++IElemInitRef;
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
}
|
2015-04-30 14:51:57 +08:00
|
|
|
llvm::array_pod_sort(Privates.begin(), Privates.end(),
|
|
|
|
array_pod_sort_comparator);
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
|
|
|
|
// Build type kmp_routine_entry_t (if not built yet).
|
|
|
|
emitKmpRoutineEntryT(KmpInt32Ty);
|
2015-05-18 15:54:53 +08:00
|
|
|
// Build type kmp_task_t (if not built yet).
|
|
|
|
if (KmpTaskTQTy.isNull()) {
|
|
|
|
KmpTaskTQTy = C.getRecordType(
|
|
|
|
createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy));
|
|
|
|
}
|
|
|
|
auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
// Build particular struct kmp_task_t for the given task.
|
2015-05-18 15:54:53 +08:00
|
|
|
auto *KmpTaskTWithPrivatesQTyRD =
|
|
|
|
createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
|
|
|
|
auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
|
|
|
|
QualType KmpTaskTWithPrivatesPtrQTy =
|
|
|
|
C.getPointerType(KmpTaskTWithPrivatesQTy);
|
|
|
|
auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
|
|
|
|
auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
|
2016-01-26 20:20:39 +08:00
|
|
|
auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
QualType SharedsPtrTy = C.getPointerType(SharedsTy);
|
|
|
|
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
// Emit initial values for private copies (if any).
|
|
|
|
llvm::Value *TaskPrivatesMap = nullptr;
|
|
|
|
auto *TaskPrivatesMapTy =
|
|
|
|
std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(),
|
|
|
|
3)
|
|
|
|
->getType();
|
|
|
|
if (!Privates.empty()) {
|
|
|
|
auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
|
|
|
|
TaskPrivatesMap = emitTaskPrivateMappingFunction(
|
|
|
|
CGM, Loc, PrivateVars, FirstprivateVars, FI->getType(), Privates);
|
|
|
|
TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
|
TaskPrivatesMap, TaskPrivatesMapTy);
|
|
|
|
} else {
|
|
|
|
TaskPrivatesMap = llvm::ConstantPointerNull::get(
|
|
|
|
cast<llvm::PointerType>(TaskPrivatesMapTy));
|
|
|
|
}
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
// Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
|
|
|
|
// kmp_task_t *tt);
|
2015-05-18 15:54:53 +08:00
|
|
|
auto *TaskEntry = emitProxyTaskFunction(
|
|
|
|
CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy,
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap);
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
|
|
|
|
// Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
|
|
|
|
// kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
|
|
|
|
// kmp_routine_entry_t *task_entry);
|
|
|
|
// Task flags. Format is taken from
|
|
|
|
// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
|
|
|
|
// description of kmp_tasking_flags struct.
|
|
|
|
const unsigned TiedFlag = 0x1;
|
|
|
|
const unsigned FinalFlag = 0x2;
|
|
|
|
unsigned Flags = Tied ? TiedFlag : 0;
|
|
|
|
auto *TaskFlags =
|
|
|
|
Final.getPointer()
|
|
|
|
? CGF.Builder.CreateSelect(Final.getPointer(),
|
|
|
|
CGF.Builder.getInt32(FinalFlag),
|
|
|
|
CGF.Builder.getInt32(/*C=*/0))
|
|
|
|
: CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0);
|
|
|
|
TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
|
2015-11-24 21:01:44 +08:00
|
|
|
auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
|
2015-10-08 17:10:53 +08:00
|
|
|
llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
|
|
|
|
getThreadID(CGF, Loc), TaskFlags,
|
|
|
|
KmpTaskTWithPrivatesTySize, SharedsSize,
|
|
|
|
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
|
TaskEntry, KmpRoutineEntryPtrTy)};
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
auto *NewTask = CGF.EmitRuntimeCall(
|
|
|
|
createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
|
2015-05-18 15:54:53 +08:00
|
|
|
auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
|
NewTask, KmpTaskTWithPrivatesPtrTy);
|
|
|
|
LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
|
|
|
|
KmpTaskTWithPrivatesQTy);
|
|
|
|
LValue TDBase =
|
|
|
|
CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
// Fill the data in the resulting kmp_task_t record.
|
|
|
|
// Copy shareds if there are any.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address KmpTaskSharedsPtr = Address::invalid();
|
2015-05-18 15:54:53 +08:00
|
|
|
if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
|
2015-09-10 16:12:02 +08:00
|
|
|
KmpTaskSharedsPtr =
|
|
|
|
Address(CGF.EmitLoadOfScalar(
|
|
|
|
CGF.EmitLValueForField(
|
|
|
|
TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
|
|
|
|
KmpTaskTShareds)),
|
|
|
|
Loc),
|
|
|
|
CGF.getNaturalTypeAlignment(SharedsTy));
|
2015-04-30 14:51:57 +08:00
|
|
|
CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
|
2015-05-18 15:54:53 +08:00
|
|
|
}
|
2015-04-30 14:51:57 +08:00
|
|
|
// Emit initial values for private copies (if any).
|
|
|
|
bool NeedsCleanup = false;
|
|
|
|
if (!Privates.empty()) {
|
2015-05-18 15:54:53 +08:00
|
|
|
auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
|
|
|
|
auto PrivatesBase = CGF.EmitLValueForField(Base, *FI);
|
2015-04-30 14:51:57 +08:00
|
|
|
FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
LValue SharedsBase;
|
|
|
|
if (!FirstprivateVars.empty()) {
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
SharedsBase = CGF.MakeAddrLValue(
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
|
KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
|
|
|
|
SharedsTy);
|
|
|
|
}
|
2015-04-30 14:51:57 +08:00
|
|
|
CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
|
|
|
|
cast<CapturedStmt>(*D.getAssociatedStmt()));
|
|
|
|
for (auto &&Pair : Privates) {
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
auto *VD = Pair.second.PrivateCopy;
|
2015-04-30 14:51:57 +08:00
|
|
|
auto *Init = VD->getAnyInitializer();
|
2015-05-18 15:54:53 +08:00
|
|
|
LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
|
2015-04-30 14:51:57 +08:00
|
|
|
if (Init) {
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
if (auto *Elem = Pair.second.PrivateElemInit) {
|
|
|
|
auto *OriginalVD = Pair.second.Original;
|
|
|
|
auto *SharedField = CapturesInfo.lookup(OriginalVD);
|
|
|
|
auto SharedRefLValue =
|
|
|
|
CGF.EmitLValueForField(SharedsBase, SharedField);
|
2015-09-11 18:29:41 +08:00
|
|
|
SharedRefLValue = CGF.MakeAddrLValue(
|
|
|
|
Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
|
|
|
|
SharedRefLValue.getType(), AlignmentSource::Decl);
|
2015-05-19 20:31:28 +08:00
|
|
|
QualType Type = OriginalVD->getType();
|
|
|
|
if (Type->isArrayType()) {
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
// Initialize firstprivate array.
|
|
|
|
if (!isa<CXXConstructExpr>(Init) ||
|
|
|
|
CGF.isTrivialInitializer(Init)) {
|
|
|
|
// Perform simple memcpy.
|
|
|
|
CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
|
2015-05-19 20:31:28 +08:00
|
|
|
SharedRefLValue.getAddress(), Type);
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
} else {
|
|
|
|
// Initialize firstprivate array using element-by-element
|
|
|
|
// intialization.
|
|
|
|
CGF.EmitOMPAggregateAssign(
|
|
|
|
PrivateLValue.getAddress(), SharedRefLValue.getAddress(),
|
2015-05-19 20:31:28 +08:00
|
|
|
Type, [&CGF, Elem, Init, &CapturesInfo](
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address DestElement, Address SrcElement) {
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
// Clean up any temporaries needed by the initialization.
|
|
|
|
CodeGenFunction::OMPPrivateScope InitScope(CGF);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
InitScope.addPrivate(Elem, [SrcElement]() -> Address {
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
return SrcElement;
|
|
|
|
});
|
|
|
|
(void)InitScope.Privatize();
|
|
|
|
// Emit initialization for single element.
|
2015-06-24 11:35:38 +08:00
|
|
|
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
|
|
|
|
CGF, &CapturesInfo);
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
CGF.EmitAnyExprToMem(Init, DestElement,
|
|
|
|
Init->getType().getQualifiers(),
|
|
|
|
/*IsInitializer=*/false);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
CodeGenFunction::OMPPrivateScope InitScope(CGF);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
return SharedRefLValue.getAddress();
|
|
|
|
});
|
|
|
|
(void)InitScope.Privatize();
|
2015-06-24 11:35:38 +08:00
|
|
|
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
CGF.EmitExprAsInit(Init, VD, PrivateLValue,
|
|
|
|
/*capturedByInit=*/false);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
|
|
|
|
}
|
2015-04-30 14:51:57 +08:00
|
|
|
}
|
|
|
|
NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType();
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
++FI;
|
2015-04-30 14:51:57 +08:00
|
|
|
}
|
|
|
|
}
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
// Provide pointer to function with destructors for privates.
|
2015-04-30 14:51:57 +08:00
|
|
|
llvm::Value *DestructorFn =
|
2015-05-18 15:54:53 +08:00
|
|
|
NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty,
|
|
|
|
KmpTaskTWithPrivatesPtrQTy,
|
|
|
|
KmpTaskTWithPrivatesQTy)
|
|
|
|
: llvm::ConstantPointerNull::get(
|
|
|
|
cast<llvm::PointerType>(KmpRoutineEntryPtrTy));
|
|
|
|
LValue Destructor = CGF.EmitLValueForField(
|
|
|
|
TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors));
|
|
|
|
CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
|
DestructorFn, KmpRoutineEntryPtrTy),
|
|
|
|
Destructor);
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
|
|
|
|
// Process list of dependences.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address DependenciesArray = Address::invalid();
|
|
|
|
unsigned NumDependencies = Dependences.size();
|
|
|
|
if (NumDependencies) {
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
// Dependence kind for RTL.
|
2015-11-23 21:33:42 +08:00
|
|
|
enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 };
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
|
|
|
|
RecordDecl *KmpDependInfoRD;
|
2015-10-08 17:10:53 +08:00
|
|
|
QualType FlagsTy =
|
|
|
|
C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
|
|
|
|
if (KmpDependInfoTy.isNull()) {
|
|
|
|
KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
|
|
|
|
KmpDependInfoRD->startDefinition();
|
|
|
|
addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
|
|
|
|
addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
|
|
|
|
addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
|
|
|
|
KmpDependInfoRD->completeDefinition();
|
|
|
|
KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
|
|
|
|
} else {
|
|
|
|
KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
|
|
|
|
}
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
// Define type kmp_depend_info[<Dependences.size()>];
|
|
|
|
QualType KmpDependInfoArrayTy = C.getConstantArrayType(
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
ArrayType::Normal, /*IndexTypeQuals=*/0);
|
|
|
|
// kmp_depend_info[<Dependences.size()>] deps;
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy);
|
|
|
|
for (unsigned i = 0; i < NumDependencies; ++i) {
|
|
|
|
const Expr *E = Dependences[i].second;
|
|
|
|
auto Addr = CGF.EmitLValue(E);
|
2015-08-31 15:32:19 +08:00
|
|
|
llvm::Value *Size;
|
|
|
|
QualType Ty = E->getType();
|
|
|
|
if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
|
|
|
|
LValue UpAddrLVal =
|
|
|
|
CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
|
|
|
|
llvm::Value *UpAddr =
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
|
2015-08-31 15:32:19 +08:00
|
|
|
llvm::Value *LowIntPtr =
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
|
2015-08-31 15:32:19 +08:00
|
|
|
llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
|
|
|
|
Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
|
2015-10-08 17:10:53 +08:00
|
|
|
} else
|
2016-01-26 20:20:39 +08:00
|
|
|
Size = CGF.getTypeSize(Ty);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
auto Base = CGF.MakeAddrLValue(
|
|
|
|
CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
KmpDependInfoTy);
|
|
|
|
// deps[i].base_addr = &<Dependences[i].second>;
|
|
|
|
auto BaseAddrLVal = CGF.EmitLValueForField(
|
|
|
|
Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CGF.EmitStoreOfScalar(
|
|
|
|
CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
|
|
|
|
BaseAddrLVal);
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
// deps[i].len = sizeof(<Dependences[i].second>);
|
|
|
|
auto LenLVal = CGF.EmitLValueForField(
|
|
|
|
Base, *std::next(KmpDependInfoRD->field_begin(), Len));
|
|
|
|
CGF.EmitStoreOfScalar(Size, LenLVal);
|
|
|
|
// deps[i].flags = <Dependences[i].first>;
|
|
|
|
RTLDependenceKindTy DepKind;
|
|
|
|
switch (Dependences[i].first) {
|
|
|
|
case OMPC_DEPEND_in:
|
|
|
|
DepKind = DepIn;
|
|
|
|
break;
|
2015-11-23 21:33:42 +08:00
|
|
|
// Out and InOut dependencies must use the same code.
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
case OMPC_DEPEND_out:
|
|
|
|
case OMPC_DEPEND_inout:
|
|
|
|
DepKind = DepInOut;
|
|
|
|
break;
|
2015-12-18 13:05:56 +08:00
|
|
|
case OMPC_DEPEND_source:
|
2015-12-23 18:27:45 +08:00
|
|
|
case OMPC_DEPEND_sink:
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
case OMPC_DEPEND_unknown:
|
|
|
|
llvm_unreachable("Unknown task dependence type");
|
|
|
|
}
|
|
|
|
auto FlagsLVal = CGF.EmitLValueForField(
|
|
|
|
Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
|
|
|
|
CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
|
|
|
|
FlagsLVal);
|
|
|
|
}
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
|
CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
CGF.VoidPtrTy);
|
|
|
|
}
|
|
|
|
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
// NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
|
|
|
|
// libcall.
|
|
|
|
// Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
|
|
|
|
// *new_task);
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
// Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
|
|
|
|
// kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
|
|
|
|
// kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
|
|
|
|
// list is not empty
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
auto *ThreadID = getThreadID(CGF, Loc);
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
auto *UpLoc = emitUpdateLocation(CGF, Loc);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
|
|
|
|
llvm::Value *DepTaskArgs[7];
|
|
|
|
if (NumDependencies) {
|
|
|
|
DepTaskArgs[0] = UpLoc;
|
|
|
|
DepTaskArgs[1] = ThreadID;
|
|
|
|
DepTaskArgs[2] = NewTask;
|
|
|
|
DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
|
|
|
|
DepTaskArgs[4] = DependenciesArray.getPointer();
|
|
|
|
DepTaskArgs[5] = CGF.Builder.getInt32(0);
|
|
|
|
DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
|
|
|
|
}
|
2016-03-28 20:58:34 +08:00
|
|
|
auto &&ThenCodeGen = [this, NumDependencies,
|
|
|
|
&TaskArgs, &DepTaskArgs](CodeGenFunction &CGF) {
|
|
|
|
// TODO: add check for untied tasks.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
if (NumDependencies) {
|
2016-03-28 20:58:34 +08:00
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps),
|
|
|
|
DepTaskArgs);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
} else {
|
2016-03-28 20:58:34 +08:00
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
TaskArgs);
|
|
|
|
}
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
};
|
2016-03-28 20:58:34 +08:00
|
|
|
typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value>
|
|
|
|
IfCallEndCleanup;
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
|
|
|
|
llvm::Value *DepWaitTaskArgs[6];
|
|
|
|
if (NumDependencies) {
|
|
|
|
DepWaitTaskArgs[0] = UpLoc;
|
|
|
|
DepWaitTaskArgs[1] = ThreadID;
|
|
|
|
DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
|
|
|
|
DepWaitTaskArgs[3] = DependenciesArray.getPointer();
|
|
|
|
DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
|
|
|
|
DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
|
|
|
|
}
|
2016-03-28 20:58:34 +08:00
|
|
|
auto &&ElseCodeGen = [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
|
|
|
|
NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF) {
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
CodeGenFunction::RunCleanupsScope LocalScope(CGF);
|
|
|
|
// Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
|
|
|
|
// kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
|
|
|
|
// ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
|
|
|
|
// is specified.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
if (NumDependencies)
|
2016-03-28 20:58:34 +08:00
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
DepWaitTaskArgs);
|
|
|
|
// Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
|
|
|
|
// kmp_task_t *new_task);
|
2016-03-28 20:58:34 +08:00
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0),
|
|
|
|
TaskArgs);
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
// Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
|
|
|
|
// kmp_task_t *new_task);
|
2016-03-28 20:58:34 +08:00
|
|
|
CGF.EHStack.pushCleanup<IfCallEndCleanup>(
|
|
|
|
NormalAndEHCleanup,
|
|
|
|
createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0),
|
|
|
|
llvm::makeArrayRef(TaskArgs));
|
|
|
|
|
|
|
|
// Call proxy_task_entry(gtid, new_task);
|
|
|
|
llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
|
|
|
|
CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
};
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
|
2016-03-28 20:58:34 +08:00
|
|
|
if (IfCond) {
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
|
2016-03-28 20:58:34 +08:00
|
|
|
} else {
|
|
|
|
CodeGenFunction::RunCleanupsScope Scope(CGF);
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
ThenCodeGen(CGF);
|
2016-03-28 20:58:34 +08:00
|
|
|
}
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
}
|
|
|
|
|
2015-10-08 17:10:53 +08:00
|
|
|
/// \brief Emit reduction operation for each element of array (required for
|
|
|
|
/// array sections) LHS op = RHS.
|
|
|
|
/// \param Type Type of array.
|
|
|
|
/// \param LHSVar Variable on the left side of the reduction operation
|
|
|
|
/// (references element of array in original variable).
|
|
|
|
/// \param RHSVar Variable on the right side of the reduction operation
|
|
|
|
/// (references element of array in original variable).
|
|
|
|
/// \param RedOpGen Generator of reduction operation with use of LHSVar and
|
|
|
|
/// RHSVar.
|
2015-10-28 21:54:16 +08:00
|
|
|
static void EmitOMPAggregateReduction(
|
2015-10-08 17:10:53 +08:00
|
|
|
CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
|
|
|
|
const VarDecl *RHSVar,
|
|
|
|
const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
|
|
|
|
const Expr *, const Expr *)> &RedOpGen,
|
|
|
|
const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
|
|
|
|
const Expr *UpExpr = nullptr) {
|
|
|
|
// Perform element-by-element initialization.
|
|
|
|
QualType ElementTy;
|
|
|
|
Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
|
|
|
|
Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
|
|
|
|
|
|
|
|
// Drill down to the base element type on both arrays.
|
|
|
|
auto ArrayTy = Type->getAsArrayTypeUnsafe();
|
|
|
|
auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
|
|
|
|
|
|
|
|
auto RHSBegin = RHSAddr.getPointer();
|
|
|
|
auto LHSBegin = LHSAddr.getPointer();
|
|
|
|
// Cast from pointer to array type to pointer to single element.
|
|
|
|
auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
|
|
|
|
// The basic structure here is a while-do loop.
|
|
|
|
auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
|
|
|
|
auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
|
|
|
|
auto IsEmpty =
|
|
|
|
CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
|
|
|
|
CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
|
|
|
|
|
|
|
|
// Enter the loop body, making that address the current address.
|
|
|
|
auto EntryBB = CGF.Builder.GetInsertBlock();
|
|
|
|
CGF.EmitBlock(BodyBB);
|
|
|
|
|
|
|
|
CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
|
|
|
|
|
|
|
|
llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
|
|
|
|
RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
|
|
|
|
RHSElementPHI->addIncoming(RHSBegin, EntryBB);
|
|
|
|
Address RHSElementCurrent =
|
|
|
|
Address(RHSElementPHI,
|
|
|
|
RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
|
|
|
|
|
|
|
|
llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
|
|
|
|
LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
|
|
|
|
LHSElementPHI->addIncoming(LHSBegin, EntryBB);
|
|
|
|
Address LHSElementCurrent =
|
|
|
|
Address(LHSElementPHI,
|
|
|
|
LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
|
|
|
|
|
|
|
|
// Emit copy.
|
|
|
|
CodeGenFunction::OMPPrivateScope Scope(CGF);
|
|
|
|
Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; });
|
|
|
|
Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; });
|
|
|
|
Scope.Privatize();
|
|
|
|
RedOpGen(CGF, XExpr, EExpr, UpExpr);
|
|
|
|
Scope.ForceCleanup();
|
|
|
|
|
|
|
|
// Shift the address forward by one element.
|
|
|
|
auto LHSElementNext = CGF.Builder.CreateConstGEP1_32(
|
|
|
|
LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
|
|
|
|
auto RHSElementNext = CGF.Builder.CreateConstGEP1_32(
|
|
|
|
RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
|
|
|
|
// Check whether we've reached the end.
|
|
|
|
auto Done =
|
|
|
|
CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
|
|
|
|
CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
|
|
|
|
LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
|
|
|
|
RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
|
|
|
|
|
|
|
|
// Done.
|
|
|
|
CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
|
|
|
|
}
|
|
|
|
|
2016-03-17 18:19:46 +08:00
|
|
|
/// Emit reduction combiner. If the combiner is a simple expression emit it as
|
|
|
|
/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
|
|
|
|
/// UDR combiner function.
|
|
|
|
static void emitReductionCombiner(CodeGenFunction &CGF,
|
|
|
|
const Expr *ReductionOp) {
|
|
|
|
if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
|
|
|
|
if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
|
|
|
|
if (auto *DRE =
|
|
|
|
dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
|
|
|
|
if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
|
|
|
|
std::pair<llvm::Function *, llvm::Function *> Reduction =
|
|
|
|
CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
|
|
|
|
RValue Func = RValue::get(Reduction.first);
|
|
|
|
CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
|
|
|
|
CGF.EmitIgnoredExpr(ReductionOp);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
CGF.EmitIgnoredExpr(ReductionOp);
|
|
|
|
}
|
|
|
|
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
|
|
|
|
llvm::Type *ArgsType,
|
2015-10-08 17:10:53 +08:00
|
|
|
ArrayRef<const Expr *> Privates,
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
ArrayRef<const Expr *> LHSExprs,
|
|
|
|
ArrayRef<const Expr *> RHSExprs,
|
|
|
|
ArrayRef<const Expr *> ReductionOps) {
|
|
|
|
auto &C = CGM.getContext();
|
|
|
|
|
|
|
|
// void reduction_func(void *LHSArg, void *RHSArg);
|
|
|
|
FunctionArgList Args;
|
|
|
|
ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
|
|
|
|
C.VoidPtrTy);
|
|
|
|
ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
|
|
|
|
C.VoidPtrTy);
|
|
|
|
Args.push_back(&LHSArg);
|
|
|
|
Args.push_back(&RHSArg);
|
2016-03-11 12:30:31 +08:00
|
|
|
auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
auto *Fn = llvm::Function::Create(
|
|
|
|
CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
|
|
|
|
".omp.reduction.reduction_func", &CGM.getModule());
|
2015-10-28 10:30:47 +08:00
|
|
|
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
CodeGenFunction CGF(CGM);
|
|
|
|
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
|
|
|
|
|
|
|
|
// Dst = (void*[n])(LHSArg);
|
|
|
|
// Src = (void*[n])(RHSArg);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
|
CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
|
|
|
|
ArgsType), CGF.getPointerAlign());
|
|
|
|
Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
|
CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
|
|
|
|
ArgsType), CGF.getPointerAlign());
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
|
|
|
|
// ...
|
|
|
|
// *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
|
|
|
|
// ...
|
|
|
|
CodeGenFunction::OMPPrivateScope Scope(CGF);
|
2015-10-08 17:10:53 +08:00
|
|
|
auto IPriv = Privates.begin();
|
|
|
|
unsigned Idx = 0;
|
|
|
|
for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
|
|
|
|
Scope.addPrivate(RHSVar, [&]() -> Address {
|
2015-10-08 17:10:53 +08:00
|
|
|
return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
});
|
|
|
|
auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
|
|
|
|
Scope.addPrivate(LHSVar, [&]() -> Address {
|
2015-10-08 17:10:53 +08:00
|
|
|
return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
});
|
2015-10-08 17:10:53 +08:00
|
|
|
QualType PrivTy = (*IPriv)->getType();
|
2016-01-26 20:20:39 +08:00
|
|
|
if (PrivTy->isVariablyModifiedType()) {
|
2015-10-08 17:10:53 +08:00
|
|
|
// Get array size and emit VLA type.
|
|
|
|
++Idx;
|
|
|
|
Address Elem =
|
|
|
|
CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize());
|
|
|
|
llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
|
2016-01-26 20:20:39 +08:00
|
|
|
auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy);
|
|
|
|
auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
|
2015-10-08 17:10:53 +08:00
|
|
|
CodeGenFunction::OpaqueValueMapping OpaqueMap(
|
2016-01-26 20:20:39 +08:00
|
|
|
CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
|
2015-10-08 17:10:53 +08:00
|
|
|
CGF.EmitVariablyModifiedType(PrivTy);
|
|
|
|
}
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
}
|
|
|
|
Scope.Privatize();
|
2015-10-08 17:10:53 +08:00
|
|
|
IPriv = Privates.begin();
|
|
|
|
auto ILHS = LHSExprs.begin();
|
|
|
|
auto IRHS = RHSExprs.begin();
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
for (auto *E : ReductionOps) {
|
2015-10-08 17:10:53 +08:00
|
|
|
if ((*IPriv)->getType()->isArrayType()) {
|
|
|
|
// Emit reduction for array section.
|
|
|
|
auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
|
|
|
|
auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
|
2016-03-17 18:19:46 +08:00
|
|
|
EmitOMPAggregateReduction(
|
|
|
|
CGF, (*IPriv)->getType(), LHSVar, RHSVar,
|
|
|
|
[=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
|
|
|
|
emitReductionCombiner(CGF, E);
|
|
|
|
});
|
2015-10-08 17:10:53 +08:00
|
|
|
} else
|
|
|
|
// Emit reduction for array subscript or single variable.
|
2016-03-17 18:19:46 +08:00
|
|
|
emitReductionCombiner(CGF, E);
|
2016-02-19 06:34:54 +08:00
|
|
|
++IPriv;
|
|
|
|
++ILHS;
|
|
|
|
++IRHS;
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
}
|
|
|
|
Scope.ForceCleanup();
|
|
|
|
CGF.FinishFunction();
|
|
|
|
return Fn;
|
|
|
|
}
|
|
|
|
|
|
|
|
void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
|
2015-10-08 17:10:53 +08:00
|
|
|
ArrayRef<const Expr *> Privates,
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
ArrayRef<const Expr *> LHSExprs,
|
|
|
|
ArrayRef<const Expr *> RHSExprs,
|
|
|
|
ArrayRef<const Expr *> ReductionOps,
|
2015-06-17 14:21:39 +08:00
|
|
|
bool WithNowait, bool SimpleReduction) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
// Next code should be emitted for reduction:
|
|
|
|
//
|
|
|
|
// static kmp_critical_name lock = { 0 };
|
|
|
|
//
|
|
|
|
// void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
|
|
|
|
// *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
|
|
|
|
// ...
|
|
|
|
// *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
|
|
|
|
// *(Type<n>-1*)rhs[<n>-1]);
|
|
|
|
// }
|
|
|
|
//
|
|
|
|
// ...
|
|
|
|
// void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
|
|
|
|
// switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
|
|
|
|
// RedList, reduce_func, &<lock>)) {
|
|
|
|
// case 1:
|
|
|
|
// ...
|
|
|
|
// <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
|
|
|
|
// ...
|
|
|
|
// __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
|
|
|
|
// break;
|
|
|
|
// case 2:
|
|
|
|
// ...
|
|
|
|
// Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
|
|
|
|
// ...
|
2015-05-07 11:54:03 +08:00
|
|
|
// [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
// break;
|
|
|
|
// default:;
|
|
|
|
// }
|
2015-06-17 14:21:39 +08:00
|
|
|
//
|
|
|
|
// if SimpleReduction is true, only the next code is generated:
|
|
|
|
// ...
|
|
|
|
// <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
|
|
|
|
// ...
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
|
|
|
|
auto &C = CGM.getContext();
|
|
|
|
|
2015-06-17 14:21:39 +08:00
|
|
|
if (SimpleReduction) {
|
|
|
|
CodeGenFunction::RunCleanupsScope Scope(CGF);
|
2015-10-08 17:10:53 +08:00
|
|
|
auto IPriv = Privates.begin();
|
|
|
|
auto ILHS = LHSExprs.begin();
|
|
|
|
auto IRHS = RHSExprs.begin();
|
2015-06-17 14:21:39 +08:00
|
|
|
for (auto *E : ReductionOps) {
|
2016-03-28 20:58:34 +08:00
|
|
|
if ((*IPriv)->getType()->isArrayType()) {
|
|
|
|
auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
|
|
|
|
auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
|
|
|
|
EmitOMPAggregateReduction(
|
|
|
|
CGF, (*IPriv)->getType(), LHSVar, RHSVar,
|
|
|
|
[=](CodeGenFunction &CGF, const Expr *, const Expr *,
|
|
|
|
const Expr *) { emitReductionCombiner(CGF, E); });
|
|
|
|
} else
|
|
|
|
emitReductionCombiner(CGF, E);
|
2016-02-19 06:34:54 +08:00
|
|
|
++IPriv;
|
|
|
|
++ILHS;
|
|
|
|
++IRHS;
|
2015-06-17 14:21:39 +08:00
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
// 1. Build a list of reduction variables.
|
|
|
|
// void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
|
2015-10-08 17:10:53 +08:00
|
|
|
auto Size = RHSExprs.size();
|
|
|
|
for (auto *E : Privates) {
|
2016-01-26 20:20:39 +08:00
|
|
|
if (E->getType()->isVariablyModifiedType())
|
2015-10-08 17:10:53 +08:00
|
|
|
// Reserve place for array size.
|
|
|
|
++Size;
|
|
|
|
}
|
|
|
|
llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
QualType ReductionArrayTy =
|
|
|
|
C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
|
|
|
|
/*IndexTypeQuals=*/0);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address ReductionList =
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
|
2015-10-08 17:10:53 +08:00
|
|
|
auto IPriv = Privates.begin();
|
|
|
|
unsigned Idx = 0;
|
|
|
|
for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address Elem =
|
2015-10-08 17:10:53 +08:00
|
|
|
CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize());
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CGF.Builder.CreateStore(
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
|
|
|
|
Elem);
|
2016-01-26 20:20:39 +08:00
|
|
|
if ((*IPriv)->getType()->isVariablyModifiedType()) {
|
2015-10-08 17:10:53 +08:00
|
|
|
// Store array size.
|
|
|
|
++Idx;
|
|
|
|
Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
|
|
|
|
CGF.getPointerSize());
|
2016-01-26 20:20:39 +08:00
|
|
|
llvm::Value *Size = CGF.Builder.CreateIntCast(
|
|
|
|
CGF.getVLASize(
|
|
|
|
CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
|
|
|
|
.first,
|
|
|
|
CGF.SizeTy, /*isSigned=*/false);
|
|
|
|
CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
|
|
|
|
Elem);
|
2015-10-08 17:10:53 +08:00
|
|
|
}
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// 2. Emit reduce_func().
|
|
|
|
auto *ReductionFn = emitReductionFunction(
|
2015-10-08 17:10:53 +08:00
|
|
|
CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
|
|
|
|
LHSExprs, RHSExprs, ReductionOps);
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
|
|
|
|
// 3. Create static kmp_critical_name lock = { 0 };
|
|
|
|
auto *Lock = getCriticalRegionLock(".reduction");
|
|
|
|
|
|
|
|
// 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
|
|
|
|
// RedList, reduce_func, &<lock>);
|
2016-02-19 18:38:26 +08:00
|
|
|
auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
auto *ThreadId = getThreadID(CGF, Loc);
|
2016-01-26 20:20:39 +08:00
|
|
|
auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
auto *RL =
|
|
|
|
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(),
|
|
|
|
CGF.VoidPtrTy);
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
llvm::Value *Args[] = {
|
|
|
|
IdentTLoc, // ident_t *<loc>
|
|
|
|
ThreadId, // i32 <gtid>
|
|
|
|
CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
|
|
|
|
ReductionArrayTySize, // size_type sizeof(RedList)
|
|
|
|
RL, // void *RedList
|
|
|
|
ReductionFn, // void (*) (void *, void *) <reduce_func>
|
|
|
|
Lock // kmp_critical_name *&<lock>
|
|
|
|
};
|
|
|
|
auto Res = CGF.EmitRuntimeCall(
|
|
|
|
createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
|
|
|
|
: OMPRTL__kmpc_reduce),
|
|
|
|
Args);
|
|
|
|
|
|
|
|
// 5. Build switch(res)
|
|
|
|
auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
|
|
|
|
auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
|
|
|
|
|
|
|
|
// 6. Build case 1:
|
|
|
|
// ...
|
|
|
|
// <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
|
|
|
|
// ...
|
|
|
|
// __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
|
|
|
|
// break;
|
|
|
|
auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
|
|
|
|
SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
|
|
|
|
CGF.EmitBlock(Case1BB);
|
|
|
|
|
2016-03-28 20:58:34 +08:00
|
|
|
{
|
|
|
|
CodeGenFunction::RunCleanupsScope Scope(CGF);
|
|
|
|
// Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
|
|
|
|
llvm::Value *EndArgs[] = {
|
|
|
|
IdentTLoc, // ident_t *<loc>
|
|
|
|
ThreadId, // i32 <gtid>
|
|
|
|
Lock // kmp_critical_name *&<lock>
|
|
|
|
};
|
|
|
|
CGF.EHStack
|
|
|
|
.pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
|
|
|
|
NormalAndEHCleanup,
|
|
|
|
createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
|
|
|
|
: OMPRTL__kmpc_end_reduce),
|
|
|
|
llvm::makeArrayRef(EndArgs));
|
2015-10-08 17:10:53 +08:00
|
|
|
auto IPriv = Privates.begin();
|
|
|
|
auto ILHS = LHSExprs.begin();
|
|
|
|
auto IRHS = RHSExprs.begin();
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
for (auto *E : ReductionOps) {
|
2016-03-28 20:58:34 +08:00
|
|
|
if ((*IPriv)->getType()->isArrayType()) {
|
|
|
|
// Emit reduction for array section.
|
|
|
|
auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
|
|
|
|
auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
|
|
|
|
EmitOMPAggregateReduction(
|
|
|
|
CGF, (*IPriv)->getType(), LHSVar, RHSVar,
|
|
|
|
[=](CodeGenFunction &CGF, const Expr *, const Expr *,
|
|
|
|
const Expr *) { emitReductionCombiner(CGF, E); });
|
|
|
|
} else
|
|
|
|
// Emit reduction for array subscript or single variable.
|
|
|
|
emitReductionCombiner(CGF, E);
|
2016-02-19 06:34:54 +08:00
|
|
|
++IPriv;
|
|
|
|
++ILHS;
|
|
|
|
++IRHS;
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
}
|
2016-03-28 20:58:34 +08:00
|
|
|
}
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
|
|
|
|
CGF.EmitBranch(DefaultBB);
|
|
|
|
|
|
|
|
// 7. Build case 2:
|
|
|
|
// ...
|
|
|
|
// Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
|
|
|
|
// ...
|
|
|
|
// break;
|
|
|
|
auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
|
|
|
|
SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
|
|
|
|
CGF.EmitBlock(Case2BB);
|
|
|
|
|
2016-03-28 20:58:34 +08:00
|
|
|
{
|
|
|
|
CodeGenFunction::RunCleanupsScope Scope(CGF);
|
|
|
|
if (!WithNowait) {
|
|
|
|
// Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
|
|
|
|
llvm::Value *EndArgs[] = {
|
|
|
|
IdentTLoc, // ident_t *<loc>
|
|
|
|
ThreadId, // i32 <gtid>
|
|
|
|
Lock // kmp_critical_name *&<lock>
|
|
|
|
};
|
|
|
|
CGF.EHStack
|
|
|
|
.pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
|
|
|
|
NormalAndEHCleanup,
|
|
|
|
createRuntimeFunction(OMPRTL__kmpc_end_reduce),
|
|
|
|
llvm::makeArrayRef(EndArgs));
|
|
|
|
}
|
2015-10-08 17:10:53 +08:00
|
|
|
auto ILHS = LHSExprs.begin();
|
|
|
|
auto IRHS = RHSExprs.begin();
|
|
|
|
auto IPriv = Privates.begin();
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
for (auto *E : ReductionOps) {
|
2016-03-28 20:58:34 +08:00
|
|
|
const Expr *XExpr = nullptr;
|
|
|
|
const Expr *EExpr = nullptr;
|
|
|
|
const Expr *UpExpr = nullptr;
|
|
|
|
BinaryOperatorKind BO = BO_Comma;
|
|
|
|
if (auto *BO = dyn_cast<BinaryOperator>(E)) {
|
|
|
|
if (BO->getOpcode() == BO_Assign) {
|
|
|
|
XExpr = BO->getLHS();
|
|
|
|
UpExpr = BO->getRHS();
|
|
|
|
}
|
2015-05-07 11:54:03 +08:00
|
|
|
}
|
2016-03-28 20:58:34 +08:00
|
|
|
// Try to emit update expression as a simple atomic.
|
|
|
|
auto *RHSExpr = UpExpr;
|
|
|
|
if (RHSExpr) {
|
|
|
|
// Analyze RHS part of the whole expression.
|
|
|
|
if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
|
|
|
|
RHSExpr->IgnoreParenImpCasts())) {
|
|
|
|
// If this is a conditional operator, analyze its condition for
|
|
|
|
// min/max reduction operator.
|
|
|
|
RHSExpr = ACO->getCond();
|
|
|
|
}
|
|
|
|
if (auto *BORHS =
|
|
|
|
dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
|
|
|
|
EExpr = BORHS->getRHS();
|
|
|
|
BO = BORHS->getOpcode();
|
|
|
|
}
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
}
|
2016-03-28 20:58:34 +08:00
|
|
|
if (XExpr) {
|
|
|
|
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
|
|
|
|
auto &&AtomicRedGen = [this, BO, VD, IPriv,
|
|
|
|
Loc](CodeGenFunction &CGF, const Expr *XExpr,
|
|
|
|
const Expr *EExpr, const Expr *UpExpr) {
|
|
|
|
LValue X = CGF.EmitLValue(XExpr);
|
|
|
|
RValue E;
|
|
|
|
if (EExpr)
|
|
|
|
E = CGF.EmitAnyExpr(EExpr);
|
|
|
|
CGF.EmitOMPAtomicSimpleUpdateExpr(
|
|
|
|
X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc,
|
|
|
|
[&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) {
|
|
|
|
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
|
|
|
|
PrivateScope.addPrivate(
|
|
|
|
VD, [&CGF, VD, XRValue, Loc]() -> Address {
|
|
|
|
Address LHSTemp = CGF.CreateMemTemp(VD->getType());
|
|
|
|
CGF.emitOMPSimpleStore(
|
|
|
|
CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
|
|
|
|
VD->getType().getNonReferenceType(), Loc);
|
|
|
|
return LHSTemp;
|
|
|
|
});
|
|
|
|
(void)PrivateScope.Privatize();
|
|
|
|
return CGF.EmitAnyExpr(UpExpr);
|
|
|
|
});
|
|
|
|
};
|
|
|
|
if ((*IPriv)->getType()->isArrayType()) {
|
|
|
|
// Emit atomic reduction for array section.
|
|
|
|
auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
|
|
|
|
EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
|
|
|
|
AtomicRedGen, XExpr, EExpr, UpExpr);
|
|
|
|
} else
|
|
|
|
// Emit atomic reduction for array subscript or single variable.
|
|
|
|
AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
|
|
|
|
} else {
|
|
|
|
// Emit as a critical region.
|
|
|
|
auto &&CritRedGen = [this, E, Loc](CodeGenFunction &CGF, const Expr *,
|
|
|
|
const Expr *, const Expr *) {
|
|
|
|
emitCriticalRegion(
|
|
|
|
CGF, ".atomic_reduction",
|
|
|
|
[=](CodeGenFunction &CGF) { emitReductionCombiner(CGF, E); },
|
|
|
|
Loc);
|
|
|
|
};
|
|
|
|
if ((*IPriv)->getType()->isArrayType()) {
|
|
|
|
auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
|
|
|
|
auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
|
|
|
|
EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
|
|
|
|
CritRedGen);
|
|
|
|
} else
|
|
|
|
CritRedGen(CGF, nullptr, nullptr, nullptr);
|
2015-10-08 17:10:53 +08:00
|
|
|
}
|
2016-02-19 06:34:54 +08:00
|
|
|
++ILHS;
|
|
|
|
++IRHS;
|
|
|
|
++IPriv;
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
}
|
2016-03-28 20:58:34 +08:00
|
|
|
}
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
|
|
|
|
CGF.EmitBranch(DefaultBB);
|
|
|
|
CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
|
|
|
|
}
|
|
|
|
|
2015-04-27 13:22:09 +08:00
|
|
|
void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
|
|
|
|
SourceLocation Loc) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-04-27 13:22:09 +08:00
|
|
|
// Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
|
|
|
|
// global_tid);
|
|
|
|
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
|
|
|
|
// Ignore return result until untied tasks are supported.
|
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
|
|
|
|
}
|
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
|
2015-07-03 17:56:58 +08:00
|
|
|
OpenMPDirectiveKind InnerKind,
|
2015-09-15 20:52:43 +08:00
|
|
|
const RegionCodeGenTy &CodeGen,
|
|
|
|
bool HasCancel) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-09-15 20:52:43 +08:00
|
|
|
InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
|
2015-04-10 12:50:10 +08:00
|
|
|
CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
|
2015-02-26 18:27:34 +08:00
|
|
|
}
|
|
|
|
|
2015-07-06 13:50:32 +08:00
|
|
|
namespace {
|
|
|
|
enum RTCancelKind {
|
|
|
|
CancelNoreq = 0,
|
|
|
|
CancelParallel = 1,
|
|
|
|
CancelLoop = 2,
|
|
|
|
CancelSections = 3,
|
|
|
|
CancelTaskgroup = 4
|
|
|
|
};
|
2016-02-11 03:11:58 +08:00
|
|
|
} // anonymous namespace
|
2015-07-06 13:50:32 +08:00
|
|
|
|
|
|
|
static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
|
|
|
|
RTCancelKind CancelKind = CancelNoreq;
|
2015-07-02 12:17:07 +08:00
|
|
|
if (CancelRegion == OMPD_parallel)
|
|
|
|
CancelKind = CancelParallel;
|
|
|
|
else if (CancelRegion == OMPD_for)
|
|
|
|
CancelKind = CancelLoop;
|
|
|
|
else if (CancelRegion == OMPD_sections)
|
|
|
|
CancelKind = CancelSections;
|
|
|
|
else {
|
|
|
|
assert(CancelRegion == OMPD_taskgroup);
|
|
|
|
CancelKind = CancelTaskgroup;
|
|
|
|
}
|
2015-07-06 13:50:32 +08:00
|
|
|
return CancelKind;
|
|
|
|
}
|
|
|
|
|
|
|
|
void CGOpenMPRuntime::emitCancellationPointCall(
|
|
|
|
CodeGenFunction &CGF, SourceLocation Loc,
|
|
|
|
OpenMPDirectiveKind CancelRegion) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-07-06 13:50:32 +08:00
|
|
|
// Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
|
|
|
|
// global_tid, kmp_int32 cncl_kind);
|
2015-07-03 17:56:58 +08:00
|
|
|
if (auto *OMPRegionInfo =
|
|
|
|
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
|
2015-09-15 20:52:43 +08:00
|
|
|
if (OMPRegionInfo->hasCancel()) {
|
2015-07-06 13:50:32 +08:00
|
|
|
llvm::Value *Args[] = {
|
|
|
|
emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
|
|
|
|
CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
|
2015-07-03 17:56:58 +08:00
|
|
|
// Ignore return result until untied tasks are supported.
|
|
|
|
auto *Result = CGF.EmitRuntimeCall(
|
|
|
|
createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
|
|
|
|
// if (__kmpc_cancellationpoint()) {
|
|
|
|
// __kmpc_cancel_barrier();
|
|
|
|
// exit from construct;
|
|
|
|
// }
|
|
|
|
auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
|
|
|
|
auto *ContBB = CGF.createBasicBlock(".cancel.continue");
|
|
|
|
auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
|
|
|
|
CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
|
|
|
|
CGF.EmitBlock(ExitBB);
|
|
|
|
// __kmpc_cancel_barrier();
|
2015-09-15 20:52:43 +08:00
|
|
|
emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
|
2015-07-03 17:56:58 +08:00
|
|
|
// exit from construct;
|
2015-09-15 20:52:43 +08:00
|
|
|
auto CancelDest =
|
|
|
|
CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
|
2015-07-03 17:56:58 +08:00
|
|
|
CGF.EmitBranchThroughCleanup(CancelDest);
|
|
|
|
CGF.EmitBlock(ContBB, /*IsFinished=*/true);
|
|
|
|
}
|
2015-07-02 12:17:07 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-07-06 13:50:32 +08:00
|
|
|
void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
|
2015-09-18 16:07:34 +08:00
|
|
|
const Expr *IfCond,
|
2015-07-06 13:50:32 +08:00
|
|
|
OpenMPDirectiveKind CancelRegion) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-07-06 13:50:32 +08:00
|
|
|
// Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_int32 cncl_kind);
|
|
|
|
if (auto *OMPRegionInfo =
|
|
|
|
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
|
2016-03-28 20:58:34 +08:00
|
|
|
auto &&ThenGen = [this, Loc, CancelRegion,
|
|
|
|
OMPRegionInfo](CodeGenFunction &CGF) {
|
2015-09-18 16:07:34 +08:00
|
|
|
llvm::Value *Args[] = {
|
2016-03-28 20:58:34 +08:00
|
|
|
emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
|
2015-09-18 16:07:34 +08:00
|
|
|
CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
|
|
|
|
// Ignore return result until untied tasks are supported.
|
2016-03-28 20:58:34 +08:00
|
|
|
auto *Result =
|
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
|
2015-09-18 16:07:34 +08:00
|
|
|
// if (__kmpc_cancel()) {
|
|
|
|
// __kmpc_cancel_barrier();
|
|
|
|
// exit from construct;
|
|
|
|
// }
|
|
|
|
auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
|
|
|
|
auto *ContBB = CGF.createBasicBlock(".cancel.continue");
|
|
|
|
auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
|
|
|
|
CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
|
|
|
|
CGF.EmitBlock(ExitBB);
|
|
|
|
// __kmpc_cancel_barrier();
|
2016-03-28 20:58:34 +08:00
|
|
|
emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
|
2015-09-18 16:07:34 +08:00
|
|
|
// exit from construct;
|
|
|
|
auto CancelDest =
|
|
|
|
CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
|
|
|
|
CGF.EmitBranchThroughCleanup(CancelDest);
|
|
|
|
CGF.EmitBlock(ContBB, /*IsFinished=*/true);
|
|
|
|
};
|
|
|
|
if (IfCond)
|
2016-03-28 20:58:34 +08:00
|
|
|
emitOMPIfClause(CGF, IfCond, ThenGen, [](CodeGenFunction &) {});
|
2015-09-18 16:07:34 +08:00
|
|
|
else
|
|
|
|
ThenGen(CGF);
|
2015-07-06 13:50:32 +08:00
|
|
|
}
|
|
|
|
}
|
2015-10-03 00:14:20 +08:00
|
|
|
|
2016-01-06 21:42:12 +08:00
|
|
|
/// \brief Obtain information that uniquely identifies a target entry. This
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
/// consists of the file and device IDs as well as line number associated with
|
|
|
|
/// the relevant entry source location.
|
2016-01-06 21:42:12 +08:00
|
|
|
static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
|
|
|
|
unsigned &DeviceID, unsigned &FileID,
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
unsigned &LineNum) {
|
2016-01-06 21:42:12 +08:00
|
|
|
|
|
|
|
auto &SM = C.getSourceManager();
|
|
|
|
|
|
|
|
// The loc should be always valid and have a file ID (the user cannot use
|
|
|
|
// #pragma directives in macros)
|
|
|
|
|
|
|
|
assert(Loc.isValid() && "Source location is expected to be always valid.");
|
|
|
|
assert(Loc.isFileID() && "Source location is expected to refer to a file.");
|
|
|
|
|
|
|
|
PresumedLoc PLoc = SM.getPresumedLoc(Loc);
|
|
|
|
assert(PLoc.isValid() && "Source location is expected to be always valid.");
|
|
|
|
|
|
|
|
llvm::sys::fs::UniqueID ID;
|
|
|
|
if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
|
|
|
|
llvm_unreachable("Source file with target region no longer exists!");
|
|
|
|
|
|
|
|
DeviceID = ID.getDevice();
|
|
|
|
FileID = ID.getFile();
|
|
|
|
LineNum = PLoc.getLine();
|
|
|
|
}
|
|
|
|
|
|
|
|
void CGOpenMPRuntime::emitTargetOutlinedFunction(
|
|
|
|
const OMPExecutableDirective &D, StringRef ParentName,
|
|
|
|
llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
|
2016-03-28 20:58:34 +08:00
|
|
|
bool IsOffloadEntry) {
|
2016-01-06 21:42:12 +08:00
|
|
|
assert(!ParentName.empty() && "Invalid target region parent name!");
|
|
|
|
|
2016-03-28 20:58:34 +08:00
|
|
|
const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
|
|
|
|
|
|
|
|
// Emit target region as a standalone region.
|
|
|
|
auto &&CodeGen = [&CS, &D](CodeGenFunction &CGF) {
|
|
|
|
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
|
|
|
|
(void)CGF.EmitOMPFirstprivateClause(D, PrivateScope);
|
|
|
|
CGF.EmitOMPPrivateClause(D, PrivateScope);
|
|
|
|
(void)PrivateScope.Privatize();
|
|
|
|
|
|
|
|
CGF.EmitStmt(CS.getCapturedStmt());
|
|
|
|
};
|
|
|
|
|
2016-03-22 09:48:56 +08:00
|
|
|
emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
|
|
|
|
IsOffloadEntry, CodeGen);
|
|
|
|
}
|
|
|
|
|
|
|
|
void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
|
|
|
|
const OMPExecutableDirective &D, StringRef ParentName,
|
|
|
|
llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
|
|
|
|
bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
// Create a unique name for the entry function using the source location
|
|
|
|
// information of the current target region. The name will be something like:
|
2016-01-06 21:42:12 +08:00
|
|
|
//
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
// __omp_offloading_DD_FFFF_PP_lBB
|
2016-01-06 21:42:12 +08:00
|
|
|
//
|
|
|
|
// where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
// mangled name of the function that encloses the target region and BB is the
|
|
|
|
// line number of the target region.
|
2016-01-06 21:42:12 +08:00
|
|
|
|
|
|
|
unsigned DeviceID;
|
|
|
|
unsigned FileID;
|
|
|
|
unsigned Line;
|
|
|
|
getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID,
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
Line);
|
2016-01-06 21:42:12 +08:00
|
|
|
SmallString<64> EntryFnName;
|
|
|
|
{
|
|
|
|
llvm::raw_svector_ostream OS(EntryFnName);
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
|
|
|
|
<< llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
|
2016-01-06 21:42:12 +08:00
|
|
|
}
|
|
|
|
|
2016-03-22 09:48:56 +08:00
|
|
|
const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
|
|
|
|
|
2015-10-03 00:14:20 +08:00
|
|
|
CodeGenFunction CGF(CGM, true);
|
2016-01-06 21:42:12 +08:00
|
|
|
CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
|
2015-10-03 00:14:20 +08:00
|
|
|
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
|
2016-01-06 21:42:12 +08:00
|
|
|
|
|
|
|
OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
|
|
|
|
|
|
|
|
// If this target outline function is not an offload entry, we don't need to
|
|
|
|
// register it.
|
|
|
|
if (!IsOffloadEntry)
|
|
|
|
return;
|
|
|
|
|
|
|
|
// The target region ID is used by the runtime library to identify the current
|
|
|
|
// target region, so it only has to be unique and not necessarily point to
|
|
|
|
// anything. It could be the pointer to the outlined function that implements
|
|
|
|
// the target region, but we aren't using that so that the compiler doesn't
|
|
|
|
// need to keep that, and could therefore inline the host function if proven
|
|
|
|
// worthwhile during optimization. In the other hand, if emitting code for the
|
|
|
|
// device, the ID has to be the function address so that it can retrieved from
|
|
|
|
// the offloading entry and launched by the runtime library. We also mark the
|
|
|
|
// outlined function to have external linkage in case we are emitting code for
|
|
|
|
// the device, because these functions will be entry points to the device.
|
|
|
|
|
|
|
|
if (CGM.getLangOpts().OpenMPIsDevice) {
|
|
|
|
OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
|
|
|
|
OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage);
|
|
|
|
} else
|
|
|
|
OutlinedFnID = new llvm::GlobalVariable(
|
|
|
|
CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
|
|
|
|
llvm::GlobalValue::PrivateLinkage,
|
|
|
|
llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id");
|
|
|
|
|
|
|
|
// Register the information for the entry associated with this target region.
|
|
|
|
OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID);
|
2015-10-03 00:14:20 +08:00
|
|
|
}
|
|
|
|
|
2016-03-04 00:20:23 +08:00
|
|
|
/// \brief Emit the num_teams clause of an enclosed teams directive at the
|
|
|
|
/// target region scope. If there is no teams directive associated with the
|
|
|
|
/// target directive, or if there is no num_teams clause associated with the
|
|
|
|
/// enclosed teams directive, return nullptr.
|
|
|
|
static llvm::Value *
|
|
|
|
emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
|
|
|
|
CodeGenFunction &CGF,
|
|
|
|
const OMPExecutableDirective &D) {
|
|
|
|
|
|
|
|
assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
|
|
|
|
"teams directive expected to be "
|
|
|
|
"emitted only for the host!");
|
|
|
|
|
|
|
|
// FIXME: For the moment we do not support combined directives with target and
|
|
|
|
// teams, so we do not expect to get any num_teams clause in the provided
|
|
|
|
// directive. Once we support that, this assertion can be replaced by the
|
|
|
|
// actual emission of the clause expression.
|
|
|
|
assert(D.getSingleClause<OMPNumTeamsClause>() == nullptr &&
|
|
|
|
"Not expecting clause in directive.");
|
|
|
|
|
|
|
|
// If the current target region has a teams region enclosed, we need to get
|
|
|
|
// the number of teams to pass to the runtime function call. This is done
|
|
|
|
// by generating the expression in a inlined region. This is required because
|
|
|
|
// the expression is captured in the enclosing target environment when the
|
|
|
|
// teams directive is not combined with target.
|
|
|
|
|
|
|
|
const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
|
|
|
|
|
|
|
|
// FIXME: Accommodate other combined directives with teams when they become
|
|
|
|
// available.
|
|
|
|
if (auto *TeamsDir = dyn_cast<OMPTeamsDirective>(CS.getCapturedStmt())) {
|
|
|
|
if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
|
|
|
|
CGOpenMPInnerExprInfo CGInfo(CGF, CS);
|
|
|
|
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
|
|
|
|
llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
|
|
|
|
return CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty,
|
|
|
|
/*IsSigned=*/true);
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we have an enclosed teams directive but no num_teams clause we use
|
|
|
|
// the default value 0.
|
|
|
|
return CGF.Builder.getInt32(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
// No teams associated with the directive.
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// \brief Emit the thread_limit clause of an enclosed teams directive at the
|
|
|
|
/// target region scope. If there is no teams directive associated with the
|
|
|
|
/// target directive, or if there is no thread_limit clause associated with the
|
|
|
|
/// enclosed teams directive, return nullptr.
|
|
|
|
static llvm::Value *
|
|
|
|
emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
|
|
|
|
CodeGenFunction &CGF,
|
|
|
|
const OMPExecutableDirective &D) {
|
|
|
|
|
|
|
|
assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
|
|
|
|
"teams directive expected to be "
|
|
|
|
"emitted only for the host!");
|
|
|
|
|
|
|
|
// FIXME: For the moment we do not support combined directives with target and
|
|
|
|
// teams, so we do not expect to get any thread_limit clause in the provided
|
|
|
|
// directive. Once we support that, this assertion can be replaced by the
|
|
|
|
// actual emission of the clause expression.
|
|
|
|
assert(D.getSingleClause<OMPThreadLimitClause>() == nullptr &&
|
|
|
|
"Not expecting clause in directive.");
|
|
|
|
|
|
|
|
// If the current target region has a teams region enclosed, we need to get
|
|
|
|
// the thread limit to pass to the runtime function call. This is done
|
|
|
|
// by generating the expression in a inlined region. This is required because
|
|
|
|
// the expression is captured in the enclosing target environment when the
|
|
|
|
// teams directive is not combined with target.
|
|
|
|
|
|
|
|
const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
|
|
|
|
|
|
|
|
// FIXME: Accommodate other combined directives with teams when they become
|
|
|
|
// available.
|
|
|
|
if (auto *TeamsDir = dyn_cast<OMPTeamsDirective>(CS.getCapturedStmt())) {
|
|
|
|
if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
|
|
|
|
CGOpenMPInnerExprInfo CGInfo(CGF, CS);
|
|
|
|
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
|
|
|
|
llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit());
|
|
|
|
return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty,
|
|
|
|
/*IsSigned=*/true);
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we have an enclosed teams directive but no thread_limit clause we use
|
|
|
|
// the default value 0.
|
|
|
|
return CGF.Builder.getInt32(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
// No teams associated with the directive.
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2015-10-03 00:14:20 +08:00
|
|
|
void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
|
|
|
|
const OMPExecutableDirective &D,
|
|
|
|
llvm::Value *OutlinedFn,
|
2016-01-06 21:42:12 +08:00
|
|
|
llvm::Value *OutlinedFnID,
|
2015-10-03 00:14:20 +08:00
|
|
|
const Expr *IfCond, const Expr *Device,
|
|
|
|
ArrayRef<llvm::Value *> CapturedVars) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-10-03 00:14:20 +08:00
|
|
|
/// \brief Values for bit flags used to specify the mapping type for
|
|
|
|
/// offloading.
|
|
|
|
enum OpenMPOffloadMappingFlags {
|
|
|
|
/// \brief Allocate memory on the device and move data from host to device.
|
|
|
|
OMP_MAP_TO = 0x01,
|
|
|
|
/// \brief Allocate memory on the device and move data from device to host.
|
|
|
|
OMP_MAP_FROM = 0x02,
|
2015-12-03 01:44:43 +08:00
|
|
|
/// \brief The element passed to the device is a pointer.
|
|
|
|
OMP_MAP_PTR = 0x20,
|
|
|
|
/// \brief Pass the element to the device by value.
|
|
|
|
OMP_MAP_BYCOPY = 0x80,
|
2015-10-03 00:14:20 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
enum OpenMPOffloadingReservedDeviceIDs {
|
|
|
|
/// \brief Device ID if the device was not defined, runtime should get it
|
|
|
|
/// from environment variables in the spec.
|
|
|
|
OMP_DEVICEID_UNDEF = -1,
|
|
|
|
};
|
|
|
|
|
2016-01-06 21:42:12 +08:00
|
|
|
assert(OutlinedFn && "Invalid outlined function!");
|
|
|
|
|
2015-12-03 01:44:43 +08:00
|
|
|
auto &Ctx = CGF.getContext();
|
|
|
|
|
2015-10-03 00:14:20 +08:00
|
|
|
// Fill up the arrays with the all the captured variables.
|
|
|
|
SmallVector<llvm::Value *, 16> BasePointers;
|
|
|
|
SmallVector<llvm::Value *, 16> Pointers;
|
|
|
|
SmallVector<llvm::Value *, 16> Sizes;
|
|
|
|
SmallVector<unsigned, 16> MapTypes;
|
|
|
|
|
|
|
|
bool hasVLACaptures = false;
|
|
|
|
|
|
|
|
const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
|
|
|
|
auto RI = CS.getCapturedRecordDecl()->field_begin();
|
|
|
|
// auto II = CS.capture_init_begin();
|
|
|
|
auto CV = CapturedVars.begin();
|
|
|
|
for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
|
|
|
|
CE = CS.capture_end();
|
|
|
|
CI != CE; ++CI, ++RI, ++CV) {
|
|
|
|
StringRef Name;
|
|
|
|
QualType Ty;
|
|
|
|
llvm::Value *BasePointer;
|
|
|
|
llvm::Value *Pointer;
|
|
|
|
llvm::Value *Size;
|
|
|
|
unsigned MapType;
|
|
|
|
|
2015-12-03 01:44:43 +08:00
|
|
|
// VLA sizes are passed to the outlined region by copy.
|
2015-10-03 00:14:20 +08:00
|
|
|
if (CI->capturesVariableArrayType()) {
|
|
|
|
BasePointer = Pointer = *CV;
|
2016-01-26 20:20:39 +08:00
|
|
|
Size = CGF.getTypeSize(RI->getType());
|
2015-12-03 01:44:43 +08:00
|
|
|
// Copy to the device as an argument. No need to retrieve it.
|
|
|
|
MapType = OMP_MAP_BYCOPY;
|
2015-10-03 00:14:20 +08:00
|
|
|
hasVLACaptures = true;
|
|
|
|
} else if (CI->capturesThis()) {
|
|
|
|
BasePointer = Pointer = *CV;
|
|
|
|
const PointerType *PtrTy = cast<PointerType>(RI->getType().getTypePtr());
|
2016-01-26 20:20:39 +08:00
|
|
|
Size = CGF.getTypeSize(PtrTy->getPointeeType());
|
2015-10-03 00:14:20 +08:00
|
|
|
// Default map type.
|
|
|
|
MapType = OMP_MAP_TO | OMP_MAP_FROM;
|
2015-12-03 01:44:43 +08:00
|
|
|
} else if (CI->capturesVariableByCopy()) {
|
|
|
|
MapType = OMP_MAP_BYCOPY;
|
|
|
|
if (!RI->getType()->isAnyPointerType()) {
|
|
|
|
// If the field is not a pointer, we need to save the actual value and
|
|
|
|
// load it as a void pointer.
|
|
|
|
auto DstAddr = CGF.CreateMemTemp(
|
|
|
|
Ctx.getUIntPtrType(),
|
|
|
|
Twine(CI->getCapturedVar()->getName()) + ".casted");
|
|
|
|
LValue DstLV = CGF.MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
|
|
|
|
|
|
|
|
auto *SrcAddrVal = CGF.EmitScalarConversion(
|
|
|
|
DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
|
|
|
|
Ctx.getPointerType(RI->getType()), SourceLocation());
|
|
|
|
LValue SrcLV =
|
|
|
|
CGF.MakeNaturalAlignAddrLValue(SrcAddrVal, RI->getType());
|
|
|
|
|
|
|
|
// Store the value using the source type pointer.
|
|
|
|
CGF.EmitStoreThroughLValue(RValue::get(*CV), SrcLV);
|
|
|
|
|
|
|
|
// Load the value using the destination type pointer.
|
|
|
|
BasePointer = Pointer =
|
|
|
|
CGF.EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
|
|
|
|
} else {
|
|
|
|
MapType |= OMP_MAP_PTR;
|
|
|
|
BasePointer = Pointer = *CV;
|
|
|
|
}
|
2016-01-26 20:20:39 +08:00
|
|
|
Size = CGF.getTypeSize(RI->getType());
|
2015-10-03 00:14:20 +08:00
|
|
|
} else {
|
2015-12-03 01:44:43 +08:00
|
|
|
assert(CI->capturesVariable() && "Expected captured reference.");
|
2015-10-03 00:14:20 +08:00
|
|
|
BasePointer = Pointer = *CV;
|
|
|
|
|
|
|
|
const ReferenceType *PtrTy =
|
|
|
|
cast<ReferenceType>(RI->getType().getTypePtr());
|
|
|
|
QualType ElementType = PtrTy->getPointeeType();
|
2016-01-26 20:20:39 +08:00
|
|
|
Size = CGF.getTypeSize(ElementType);
|
2015-12-03 01:44:43 +08:00
|
|
|
// The default map type for a scalar/complex type is 'to' because by
|
|
|
|
// default the value doesn't have to be retrieved. For an aggregate type,
|
|
|
|
// the default is 'tofrom'.
|
|
|
|
MapType = ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM)
|
|
|
|
: OMP_MAP_TO;
|
|
|
|
if (ElementType->isAnyPointerType())
|
|
|
|
MapType |= OMP_MAP_PTR;
|
2015-10-03 00:14:20 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
BasePointers.push_back(BasePointer);
|
|
|
|
Pointers.push_back(Pointer);
|
|
|
|
Sizes.push_back(Size);
|
|
|
|
MapTypes.push_back(MapType);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Keep track on whether the host function has to be executed.
|
|
|
|
auto OffloadErrorQType =
|
2015-12-03 01:44:43 +08:00
|
|
|
Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true);
|
2015-10-03 00:14:20 +08:00
|
|
|
auto OffloadError = CGF.MakeAddrLValue(
|
|
|
|
CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"),
|
|
|
|
OffloadErrorQType);
|
|
|
|
CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty),
|
|
|
|
OffloadError);
|
|
|
|
|
|
|
|
// Fill up the pointer arrays and transfer execution to the device.
|
2016-03-28 20:58:34 +08:00
|
|
|
auto &&ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes,
|
|
|
|
hasVLACaptures, Device, OutlinedFnID, OffloadError,
|
|
|
|
OffloadErrorQType, &D](CodeGenFunction &CGF) {
|
2015-10-03 00:14:20 +08:00
|
|
|
unsigned PointerNumVal = BasePointers.size();
|
|
|
|
llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal);
|
|
|
|
llvm::Value *BasePointersArray;
|
|
|
|
llvm::Value *PointersArray;
|
|
|
|
llvm::Value *SizesArray;
|
|
|
|
llvm::Value *MapTypesArray;
|
|
|
|
|
|
|
|
if (PointerNumVal) {
|
|
|
|
llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true);
|
2015-12-03 01:44:43 +08:00
|
|
|
QualType PointerArrayType = Ctx.getConstantArrayType(
|
|
|
|
Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
|
2015-10-03 00:14:20 +08:00
|
|
|
/*IndexTypeQuals=*/0);
|
|
|
|
|
|
|
|
BasePointersArray =
|
|
|
|
CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
|
|
|
|
PointersArray =
|
|
|
|
CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
|
|
|
|
|
|
|
|
// If we don't have any VLA types, we can use a constant array for the map
|
|
|
|
// sizes, otherwise we need to fill up the arrays as we do for the
|
|
|
|
// pointers.
|
|
|
|
if (hasVLACaptures) {
|
2015-12-03 01:44:43 +08:00
|
|
|
QualType SizeArrayType = Ctx.getConstantArrayType(
|
|
|
|
Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
|
2015-10-03 00:14:20 +08:00
|
|
|
/*IndexTypeQuals=*/0);
|
|
|
|
SizesArray =
|
|
|
|
CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
|
|
|
|
} else {
|
|
|
|
// We expect all the sizes to be constant, so we collect them to create
|
|
|
|
// a constant array.
|
|
|
|
SmallVector<llvm::Constant *, 16> ConstSizes;
|
|
|
|
for (auto S : Sizes)
|
|
|
|
ConstSizes.push_back(cast<llvm::Constant>(S));
|
|
|
|
|
|
|
|
auto *SizesArrayInit = llvm::ConstantArray::get(
|
2016-03-28 20:58:34 +08:00
|
|
|
llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
|
2015-10-03 00:14:20 +08:00
|
|
|
auto *SizesArrayGbl = new llvm::GlobalVariable(
|
2016-03-28 20:58:34 +08:00
|
|
|
CGM.getModule(), SizesArrayInit->getType(),
|
2015-10-03 00:14:20 +08:00
|
|
|
/*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
|
|
|
|
SizesArrayInit, ".offload_sizes");
|
|
|
|
SizesArrayGbl->setUnnamedAddr(true);
|
|
|
|
SizesArray = SizesArrayGbl;
|
|
|
|
}
|
|
|
|
|
|
|
|
// The map types are always constant so we don't need to generate code to
|
|
|
|
// fill arrays. Instead, we create an array constant.
|
|
|
|
llvm::Constant *MapTypesArrayInit =
|
|
|
|
llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
|
|
|
|
auto *MapTypesArrayGbl = new llvm::GlobalVariable(
|
2016-03-28 20:58:34 +08:00
|
|
|
CGM.getModule(), MapTypesArrayInit->getType(),
|
2015-10-03 00:14:20 +08:00
|
|
|
/*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
|
|
|
|
MapTypesArrayInit, ".offload_maptypes");
|
|
|
|
MapTypesArrayGbl->setUnnamedAddr(true);
|
|
|
|
MapTypesArray = MapTypesArrayGbl;
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < PointerNumVal; ++i) {
|
2015-12-03 01:44:43 +08:00
|
|
|
llvm::Value *BPVal = BasePointers[i];
|
|
|
|
if (BPVal->getType()->isPointerTy())
|
2016-03-28 20:58:34 +08:00
|
|
|
BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy);
|
2015-12-03 01:44:43 +08:00
|
|
|
else {
|
|
|
|
assert(BPVal->getType()->isIntegerTy() &&
|
|
|
|
"If not a pointer, the value type must be an integer.");
|
2016-03-28 20:58:34 +08:00
|
|
|
BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy);
|
2015-12-03 01:44:43 +08:00
|
|
|
}
|
2015-10-03 00:14:20 +08:00
|
|
|
llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
|
2016-03-28 20:58:34 +08:00
|
|
|
llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal),
|
2015-10-03 00:14:20 +08:00
|
|
|
BasePointersArray, 0, i);
|
2015-12-03 01:44:43 +08:00
|
|
|
Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
|
|
|
|
CGF.Builder.CreateStore(BPVal, BPAddr);
|
|
|
|
|
|
|
|
llvm::Value *PVal = Pointers[i];
|
|
|
|
if (PVal->getType()->isPointerTy())
|
2016-03-28 20:58:34 +08:00
|
|
|
PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy);
|
2015-12-03 01:44:43 +08:00
|
|
|
else {
|
|
|
|
assert(PVal->getType()->isIntegerTy() &&
|
|
|
|
"If not a pointer, the value type must be an integer.");
|
2016-03-28 20:58:34 +08:00
|
|
|
PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy);
|
2015-12-03 01:44:43 +08:00
|
|
|
}
|
2015-10-03 00:14:20 +08:00
|
|
|
llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
|
2016-03-28 20:58:34 +08:00
|
|
|
llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray,
|
2015-10-03 00:14:20 +08:00
|
|
|
0, i);
|
2015-12-03 01:44:43 +08:00
|
|
|
Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
|
|
|
|
CGF.Builder.CreateStore(PVal, PAddr);
|
2015-10-03 00:14:20 +08:00
|
|
|
|
|
|
|
if (hasVLACaptures) {
|
|
|
|
llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
|
2016-03-28 20:58:34 +08:00
|
|
|
llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
|
2015-10-03 00:14:20 +08:00
|
|
|
/*Idx0=*/0,
|
|
|
|
/*Idx1=*/i);
|
2015-12-03 01:44:43 +08:00
|
|
|
Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
|
2015-10-03 00:14:20 +08:00
|
|
|
CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(
|
2016-03-28 20:58:34 +08:00
|
|
|
Sizes[i], CGM.SizeTy, /*isSigned=*/true),
|
2015-10-03 00:14:20 +08:00
|
|
|
SAddr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
BasePointersArray = CGF.Builder.CreateConstInBoundsGEP2_32(
|
2016-03-28 20:58:34 +08:00
|
|
|
llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray,
|
2015-10-03 00:14:20 +08:00
|
|
|
/*Idx0=*/0, /*Idx1=*/0);
|
|
|
|
PointersArray = CGF.Builder.CreateConstInBoundsGEP2_32(
|
2016-03-28 20:58:34 +08:00
|
|
|
llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray,
|
2015-10-03 00:14:20 +08:00
|
|
|
/*Idx0=*/0,
|
|
|
|
/*Idx1=*/0);
|
|
|
|
SizesArray = CGF.Builder.CreateConstInBoundsGEP2_32(
|
2016-03-28 20:58:34 +08:00
|
|
|
llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
|
2015-10-03 00:14:20 +08:00
|
|
|
/*Idx0=*/0, /*Idx1=*/0);
|
|
|
|
MapTypesArray = CGF.Builder.CreateConstInBoundsGEP2_32(
|
2016-03-28 20:58:34 +08:00
|
|
|
llvm::ArrayType::get(CGM.Int32Ty, PointerNumVal), MapTypesArray,
|
2015-10-03 00:14:20 +08:00
|
|
|
/*Idx0=*/0,
|
|
|
|
/*Idx1=*/0);
|
|
|
|
|
|
|
|
} else {
|
2016-03-28 20:58:34 +08:00
|
|
|
BasePointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
|
|
|
|
PointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
|
|
|
|
SizesArray = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
|
2015-10-03 00:14:20 +08:00
|
|
|
MapTypesArray =
|
2016-03-28 20:58:34 +08:00
|
|
|
llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo());
|
2015-10-03 00:14:20 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// On top of the arrays that were filled up, the target offloading call
|
|
|
|
// takes as arguments the device id as well as the host pointer. The host
|
|
|
|
// pointer is used by the runtime library to identify the current target
|
|
|
|
// region, so it only has to be unique and not necessarily point to
|
|
|
|
// anything. It could be the pointer to the outlined function that
|
|
|
|
// implements the target region, but we aren't using that so that the
|
|
|
|
// compiler doesn't need to keep that, and could therefore inline the host
|
|
|
|
// function if proven worthwhile during optimization.
|
|
|
|
|
2016-01-06 21:42:12 +08:00
|
|
|
// From this point on, we need to have an ID of the target region defined.
|
|
|
|
assert(OutlinedFnID && "Invalid outlined function ID!");
|
2015-10-03 00:14:20 +08:00
|
|
|
|
|
|
|
// Emit device ID if any.
|
|
|
|
llvm::Value *DeviceID;
|
|
|
|
if (Device)
|
|
|
|
DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
|
2016-03-28 20:58:34 +08:00
|
|
|
CGM.Int32Ty, /*isSigned=*/true);
|
2015-10-03 00:14:20 +08:00
|
|
|
else
|
|
|
|
DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
|
|
|
|
|
2016-03-04 00:20:23 +08:00
|
|
|
// Return value of the runtime offloading call.
|
|
|
|
llvm::Value *Return;
|
|
|
|
|
2016-03-28 20:58:34 +08:00
|
|
|
auto *NumTeams = emitNumTeamsClauseForTargetDirective(*this, CGF, D);
|
|
|
|
auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(*this, CGF, D);
|
2016-03-04 00:20:23 +08:00
|
|
|
|
|
|
|
// If we have NumTeams defined this means that we have an enclosed teams
|
|
|
|
// region. Therefore we also expect to have ThreadLimit defined. These two
|
|
|
|
// values should be defined in the presence of a teams directive, regardless
|
|
|
|
// of having any clauses associated. If the user is using teams but no
|
|
|
|
// clauses, these two values will be the default that should be passed to
|
|
|
|
// the runtime library - a 32-bit integer with the value zero.
|
|
|
|
if (NumTeams) {
|
|
|
|
assert(ThreadLimit && "Thread limit expression should be available along "
|
|
|
|
"with number of teams.");
|
|
|
|
llvm::Value *OffloadingArgs[] = {
|
|
|
|
DeviceID, OutlinedFnID, PointerNum,
|
|
|
|
BasePointersArray, PointersArray, SizesArray,
|
|
|
|
MapTypesArray, NumTeams, ThreadLimit};
|
|
|
|
Return = CGF.EmitRuntimeCall(
|
2016-03-28 20:58:34 +08:00
|
|
|
createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs);
|
2016-03-04 00:20:23 +08:00
|
|
|
} else {
|
|
|
|
llvm::Value *OffloadingArgs[] = {
|
|
|
|
DeviceID, OutlinedFnID, PointerNum, BasePointersArray,
|
|
|
|
PointersArray, SizesArray, MapTypesArray};
|
2016-03-28 20:58:34 +08:00
|
|
|
Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target),
|
2016-03-04 00:20:23 +08:00
|
|
|
OffloadingArgs);
|
|
|
|
}
|
2015-10-03 00:14:20 +08:00
|
|
|
|
|
|
|
CGF.EmitStoreOfScalar(Return, OffloadError);
|
|
|
|
};
|
|
|
|
|
2016-01-06 21:42:12 +08:00
|
|
|
// Notify that the host version must be executed.
|
2016-03-28 20:58:34 +08:00
|
|
|
auto &&ElseGen = [this, OffloadError,
|
|
|
|
OffloadErrorQType](CodeGenFunction &CGF) {
|
|
|
|
CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/-1u),
|
2016-01-06 21:42:12 +08:00
|
|
|
OffloadError);
|
|
|
|
};
|
|
|
|
|
|
|
|
// If we have a target function ID it means that we need to support
|
|
|
|
// offloading, otherwise, just execute on the host. We need to execute on host
|
|
|
|
// regardless of the conditional in the if clause if, e.g., the user do not
|
|
|
|
// specify target triples.
|
|
|
|
if (OutlinedFnID) {
|
2016-03-28 20:58:34 +08:00
|
|
|
if (IfCond) {
|
2016-01-06 21:42:12 +08:00
|
|
|
emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
|
2016-03-28 20:58:34 +08:00
|
|
|
} else {
|
|
|
|
CodeGenFunction::RunCleanupsScope Scope(CGF);
|
2016-01-06 21:42:12 +08:00
|
|
|
ThenGen(CGF);
|
2016-03-28 20:58:34 +08:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
CodeGenFunction::RunCleanupsScope Scope(CGF);
|
2016-01-06 21:42:12 +08:00
|
|
|
ElseGen(CGF);
|
2016-03-28 20:58:34 +08:00
|
|
|
}
|
2015-10-03 00:14:20 +08:00
|
|
|
|
|
|
|
// Check the error code and execute the host version if required.
|
|
|
|
auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed");
|
|
|
|
auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont");
|
|
|
|
auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation());
|
|
|
|
auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal);
|
|
|
|
CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
|
|
|
|
|
|
|
|
CGF.EmitBlock(OffloadFailedBlock);
|
|
|
|
CGF.Builder.CreateCall(OutlinedFn, BasePointers);
|
|
|
|
CGF.EmitBranch(OffloadContBlock);
|
|
|
|
|
|
|
|
CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
|
|
|
|
}
|
2016-01-06 21:42:12 +08:00
|
|
|
|
|
|
|
void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
|
|
|
|
StringRef ParentName) {
|
|
|
|
if (!S)
|
|
|
|
return;
|
|
|
|
|
|
|
|
// If we find a OMP target directive, codegen the outline function and
|
|
|
|
// register the result.
|
|
|
|
// FIXME: Add other directives with target when they become supported.
|
|
|
|
bool isTargetDirective = isa<OMPTargetDirective>(S);
|
|
|
|
|
|
|
|
if (isTargetDirective) {
|
|
|
|
auto *E = cast<OMPExecutableDirective>(S);
|
|
|
|
unsigned DeviceID;
|
|
|
|
unsigned FileID;
|
|
|
|
unsigned Line;
|
|
|
|
getTargetEntryUniqueInfo(CGM.getContext(), E->getLocStart(), DeviceID,
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
FileID, Line);
|
2016-01-06 21:42:12 +08:00
|
|
|
|
|
|
|
// Is this a target region that should not be emitted as an entry point? If
|
|
|
|
// so just signal we are done with this target region.
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
|
|
|
|
ParentName, Line))
|
2016-01-06 21:42:12 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
llvm::Function *Fn;
|
|
|
|
llvm::Constant *Addr;
|
2016-03-28 20:58:34 +08:00
|
|
|
emitTargetOutlinedFunction(*E, ParentName, Fn, Addr,
|
|
|
|
/*isOffloadEntry=*/true);
|
2016-01-06 21:42:12 +08:00
|
|
|
assert(Fn && Addr && "Target region emission failed.");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) {
|
|
|
|
if (!E->getAssociatedStmt())
|
|
|
|
return;
|
|
|
|
|
|
|
|
scanForTargetRegionsFunctions(
|
|
|
|
cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(),
|
|
|
|
ParentName);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If this is a lambda function, look into its body.
|
|
|
|
if (auto *L = dyn_cast<LambdaExpr>(S))
|
|
|
|
S = L->getBody();
|
|
|
|
|
|
|
|
// Keep looking for target regions recursively.
|
|
|
|
for (auto *II : S->children())
|
|
|
|
scanForTargetRegionsFunctions(II, ParentName);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
|
|
|
|
auto &FD = *cast<FunctionDecl>(GD.getDecl());
|
|
|
|
|
|
|
|
// If emitting code for the host, we do not process FD here. Instead we do
|
|
|
|
// the normal code generation.
|
|
|
|
if (!CGM.getLangOpts().OpenMPIsDevice)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Try to detect target regions in the function.
|
|
|
|
scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD));
|
|
|
|
|
|
|
|
// We should not emit any function othen that the ones created during the
|
|
|
|
// scanning. Therefore, we signal that this function is completely dealt
|
|
|
|
// with.
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
|
|
|
|
if (!CGM.getLangOpts().OpenMPIsDevice)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Check if there are Ctors/Dtors in this declaration and look for target
|
|
|
|
// regions in it. We use the complete variant to produce the kernel name
|
|
|
|
// mangling.
|
|
|
|
QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
|
|
|
|
if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
|
|
|
|
for (auto *Ctor : RD->ctors()) {
|
|
|
|
StringRef ParentName =
|
|
|
|
CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
|
|
|
|
scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
|
|
|
|
}
|
|
|
|
auto *Dtor = RD->getDestructor();
|
|
|
|
if (Dtor) {
|
|
|
|
StringRef ParentName =
|
|
|
|
CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
|
|
|
|
scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we are in target mode we do not emit any global (declare target is not
|
|
|
|
// implemented yet). Therefore we signal that GD was processed in this case.
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
|
|
|
|
auto *VD = GD.getDecl();
|
|
|
|
if (isa<FunctionDecl>(VD))
|
|
|
|
return emitTargetFunctions(GD);
|
|
|
|
|
|
|
|
return emitTargetGlobalVariable(GD);
|
|
|
|
}
|
|
|
|
|
|
|
|
llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
|
|
|
|
// If we have offloading in the current module, we need to emit the entries
|
|
|
|
// now and register the offloading descriptor.
|
|
|
|
createOffloadEntriesAndInfoMetadata();
|
|
|
|
|
|
|
|
// Create and register the offloading binary descriptors. This is the main
|
|
|
|
// entity that captures all the information about offloading in the current
|
|
|
|
// compilation unit.
|
|
|
|
return createOffloadingBinaryDescriptorRegistration();
|
|
|
|
}
|
2016-03-04 04:34:23 +08:00
|
|
|
|
|
|
|
void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
|
|
|
|
const OMPExecutableDirective &D,
|
|
|
|
SourceLocation Loc,
|
|
|
|
llvm::Value *OutlinedFn,
|
|
|
|
ArrayRef<llvm::Value *> CapturedVars) {
|
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
|
|
|
|
|
|
|
auto *RTLoc = emitUpdateLocation(CGF, Loc);
|
|
|
|
CodeGenFunction::RunCleanupsScope Scope(CGF);
|
|
|
|
|
|
|
|
// Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
|
|
|
|
llvm::Value *Args[] = {
|
|
|
|
RTLoc,
|
|
|
|
CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
|
|
|
|
CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
|
|
|
|
llvm::SmallVector<llvm::Value *, 16> RealArgs;
|
|
|
|
RealArgs.append(std::begin(Args), std::end(Args));
|
|
|
|
RealArgs.append(CapturedVars.begin(), CapturedVars.end());
|
|
|
|
|
|
|
|
auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
|
|
|
|
CGF.EmitRuntimeCall(RTLFn, RealArgs);
|
|
|
|
}
|
|
|
|
|
|
|
|
void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
|
|
|
|
llvm::Value *NumTeams,
|
|
|
|
llvm::Value *ThreadLimit,
|
|
|
|
SourceLocation Loc) {
|
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
|
|
|
|
|
|
|
auto *RTLoc = emitUpdateLocation(CGF, Loc);
|
|
|
|
|
|
|
|
// Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
|
|
|
|
llvm::Value *PushNumTeamsArgs[] = {
|
|
|
|
RTLoc, getThreadID(CGF, Loc), NumTeams, ThreadLimit};
|
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
|
|
|
|
PushNumTeamsArgs);
|
|
|
|
}
|