2014-05-06 18:08:46 +08:00
|
|
|
//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
|
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This provides a class for OpenMP runtime code generation.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2016-01-06 21:42:12 +08:00
|
|
|
#include "CGCXXABI.h"
|
|
|
|
#include "CGCleanup.h"
|
2014-05-06 18:08:46 +08:00
|
|
|
#include "CGOpenMPRuntime.h"
|
|
|
|
#include "CodeGenFunction.h"
|
2017-03-03 04:04:19 +08:00
|
|
|
#include "clang/CodeGen/ConstantInitBuilder.h"
|
2014-05-06 18:08:46 +08:00
|
|
|
#include "clang/AST/Decl.h"
|
2015-01-14 19:29:14 +08:00
|
|
|
#include "clang/AST/StmtOpenMP.h"
|
2014-05-06 18:08:46 +08:00
|
|
|
#include "llvm/ADT/ArrayRef.h"
|
2016-11-11 13:35:12 +08:00
|
|
|
#include "llvm/Bitcode/BitcodeReader.h"
|
[OPENMP] Codegen for 'if' clause in 'parallel' directive.
Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive.
If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
global_thread_id.addr = alloca i32
store i32 global_thread_id, global_thread_id.addr
zero.addr = alloca i32
store i32 0, zero.addr
kmpc_serialized_parallel(loc, global_thread_id);
microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/);
kmpc_end_serialized_parallel(loc, global_thread_id);
Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call.
Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D4716
llvm-svn: 219597
2014-10-13 14:02:40 +08:00
|
|
|
#include "llvm/IR/CallSite.h"
|
2014-05-06 18:08:46 +08:00
|
|
|
#include "llvm/IR/DerivedTypes.h"
|
|
|
|
#include "llvm/IR/GlobalValue.h"
|
|
|
|
#include "llvm/IR/Value.h"
|
2016-01-06 21:42:12 +08:00
|
|
|
#include "llvm/Support/Format.h"
|
2014-05-06 18:08:46 +08:00
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2014-06-18 15:08:49 +08:00
|
|
|
#include <cassert>
|
2014-05-06 18:08:46 +08:00
|
|
|
|
|
|
|
using namespace clang;
|
|
|
|
using namespace CodeGen;
|
|
|
|
|
2014-10-10 21:57:57 +08:00
|
|
|
namespace {
|
2015-02-26 18:27:34 +08:00
|
|
|
/// \brief Base class for handling code generation inside OpenMP regions.
|
2014-10-10 20:19:54 +08:00
|
|
|
class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
|
|
|
|
public:
|
2015-04-10 12:50:10 +08:00
|
|
|
/// \brief Kinds of OpenMP regions used in codegen.
|
|
|
|
enum CGOpenMPRegionKind {
|
|
|
|
/// \brief Region with outlined function for standalone 'parallel'
|
|
|
|
/// directive.
|
|
|
|
ParallelOutlinedRegion,
|
|
|
|
/// \brief Region with outlined function for standalone 'task' directive.
|
|
|
|
TaskOutlinedRegion,
|
|
|
|
/// \brief Region for constructs that do not require function outlining,
|
|
|
|
/// like 'for', 'sections', 'atomic' etc. directives.
|
|
|
|
InlinedRegion,
|
2015-10-03 00:14:20 +08:00
|
|
|
/// \brief Region with outlined function for standalone 'target' directive.
|
|
|
|
TargetRegion,
|
2015-04-10 12:50:10 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
CGOpenMPRegionInfo(const CapturedStmt &CS,
|
|
|
|
const CGOpenMPRegionKind RegionKind,
|
2015-09-15 20:52:43 +08:00
|
|
|
const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
|
|
|
|
bool HasCancel)
|
2015-04-10 12:50:10 +08:00
|
|
|
: CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
|
2015-09-15 20:52:43 +08:00
|
|
|
CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
|
2014-10-10 20:19:54 +08:00
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
|
2015-09-15 20:52:43 +08:00
|
|
|
const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
|
|
|
|
bool HasCancel)
|
2015-07-03 17:56:58 +08:00
|
|
|
: CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
|
2015-09-15 20:52:43 +08:00
|
|
|
Kind(Kind), HasCancel(HasCancel) {}
|
2015-02-26 18:27:34 +08:00
|
|
|
|
|
|
|
/// \brief Get a variable or parameter for storing global thread id
|
2014-10-10 20:19:54 +08:00
|
|
|
/// inside OpenMP construct.
|
2015-02-26 18:27:34 +08:00
|
|
|
virtual const VarDecl *getThreadIDVariable() const = 0;
|
2014-10-10 20:19:54 +08:00
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
/// \brief Emit the captured statement body.
|
2015-09-11 01:07:54 +08:00
|
|
|
void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
|
2015-04-10 12:50:10 +08:00
|
|
|
|
2015-02-26 18:27:34 +08:00
|
|
|
/// \brief Get an LValue for the current ThreadID variable.
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
/// \return LValue for thread id variable. This LValue always has type int32*.
|
|
|
|
virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
|
2014-10-10 20:19:54 +08:00
|
|
|
|
2016-04-20 12:01:36 +08:00
|
|
|
virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
|
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
|
2015-02-26 18:27:34 +08:00
|
|
|
|
2015-07-03 17:56:58 +08:00
|
|
|
OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
|
|
|
|
|
2015-09-15 20:52:43 +08:00
|
|
|
bool hasCancel() const { return HasCancel; }
|
|
|
|
|
2014-10-10 20:19:54 +08:00
|
|
|
static bool classof(const CGCapturedStmtInfo *Info) {
|
|
|
|
return Info->getKind() == CR_OpenMP;
|
|
|
|
}
|
2015-04-10 12:50:10 +08:00
|
|
|
|
2016-04-20 12:01:36 +08:00
|
|
|
~CGOpenMPRegionInfo() override = default;
|
|
|
|
|
2015-02-26 18:27:34 +08:00
|
|
|
protected:
|
2015-04-10 12:50:10 +08:00
|
|
|
CGOpenMPRegionKind RegionKind;
|
2016-01-13 04:54:36 +08:00
|
|
|
RegionCodeGenTy CodeGen;
|
2015-07-03 17:56:58 +08:00
|
|
|
OpenMPDirectiveKind Kind;
|
2015-09-15 20:52:43 +08:00
|
|
|
bool HasCancel;
|
2015-02-26 18:27:34 +08:00
|
|
|
};
|
2014-10-10 20:19:54 +08:00
|
|
|
|
2015-02-26 18:27:34 +08:00
|
|
|
/// \brief API for captured statement code generation in OpenMP constructs.
|
2016-04-20 12:01:36 +08:00
|
|
|
class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
|
2015-02-26 18:27:34 +08:00
|
|
|
public:
|
2015-04-10 12:50:10 +08:00
|
|
|
CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
|
2015-07-03 17:56:58 +08:00
|
|
|
const RegionCodeGenTy &CodeGen,
|
2017-01-10 23:42:51 +08:00
|
|
|
OpenMPDirectiveKind Kind, bool HasCancel,
|
|
|
|
StringRef HelperName)
|
2015-09-15 20:52:43 +08:00
|
|
|
: CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
|
|
|
|
HasCancel),
|
2017-01-10 23:42:51 +08:00
|
|
|
ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
|
2015-02-26 18:27:34 +08:00
|
|
|
assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
|
|
|
|
}
|
2016-02-11 03:11:58 +08:00
|
|
|
|
2015-02-26 18:27:34 +08:00
|
|
|
/// \brief Get a variable or parameter for storing global thread id
|
|
|
|
/// inside OpenMP construct.
|
2015-04-11 10:00:23 +08:00
|
|
|
const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
|
2015-04-10 12:50:10 +08:00
|
|
|
|
2014-10-10 20:19:54 +08:00
|
|
|
/// \brief Get the name of the capture helper.
|
2017-01-10 23:42:51 +08:00
|
|
|
StringRef getHelperName() const override { return HelperName; }
|
2014-10-10 20:19:54 +08:00
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
static bool classof(const CGCapturedStmtInfo *Info) {
|
|
|
|
return CGOpenMPRegionInfo::classof(Info) &&
|
|
|
|
cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
|
|
|
|
ParallelOutlinedRegion;
|
|
|
|
}
|
|
|
|
|
2014-10-10 20:19:54 +08:00
|
|
|
private:
|
|
|
|
/// \brief A variable or parameter storing global thread id for OpenMP
|
|
|
|
/// constructs.
|
|
|
|
const VarDecl *ThreadIDVar;
|
2017-01-10 23:42:51 +08:00
|
|
|
StringRef HelperName;
|
2015-02-26 18:27:34 +08:00
|
|
|
};
|
|
|
|
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
/// \brief API for captured statement code generation in OpenMP constructs.
|
2016-04-20 12:01:36 +08:00
|
|
|
class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
public:
|
2016-04-20 12:01:36 +08:00
|
|
|
class UntiedTaskActionTy final : public PrePostActionTy {
|
|
|
|
bool Untied;
|
|
|
|
const VarDecl *PartIDVar;
|
|
|
|
const RegionCodeGenTy UntiedCodeGen;
|
|
|
|
llvm::SwitchInst *UntiedSwitch = nullptr;
|
|
|
|
|
|
|
|
public:
|
|
|
|
UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
|
|
|
|
const RegionCodeGenTy &UntiedCodeGen)
|
|
|
|
: Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
|
|
|
|
void Enter(CodeGenFunction &CGF) override {
|
|
|
|
if (Untied) {
|
|
|
|
// Emit task switching point.
|
|
|
|
auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
|
|
|
|
CGF.GetAddrOfLocalVar(PartIDVar),
|
|
|
|
PartIDVar->getType()->castAs<PointerType>());
|
|
|
|
auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation());
|
|
|
|
auto *DoneBB = CGF.createBasicBlock(".untied.done.");
|
|
|
|
UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
|
|
|
|
CGF.EmitBlock(DoneBB);
|
|
|
|
CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
|
|
|
|
CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
|
|
|
|
UntiedSwitch->addCase(CGF.Builder.getInt32(0),
|
|
|
|
CGF.Builder.GetInsertBlock());
|
|
|
|
emitUntiedSwitch(CGF);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
void emitUntiedSwitch(CodeGenFunction &CGF) const {
|
|
|
|
if (Untied) {
|
|
|
|
auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
|
|
|
|
CGF.GetAddrOfLocalVar(PartIDVar),
|
|
|
|
PartIDVar->getType()->castAs<PointerType>());
|
|
|
|
CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
|
|
|
|
PartIdLVal);
|
|
|
|
UntiedCodeGen(CGF);
|
|
|
|
CodeGenFunction::JumpDest CurPoint =
|
|
|
|
CGF.getJumpDestInCurrentScope(".untied.next.");
|
|
|
|
CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
|
|
|
|
CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
|
|
|
|
UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
|
|
|
|
CGF.Builder.GetInsertBlock());
|
|
|
|
CGF.EmitBranchThroughCleanup(CurPoint);
|
|
|
|
CGF.EmitBlock(CurPoint.getBlock());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
|
|
|
|
};
|
2015-04-10 12:50:10 +08:00
|
|
|
CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
const VarDecl *ThreadIDVar,
|
2015-07-03 17:56:58 +08:00
|
|
|
const RegionCodeGenTy &CodeGen,
|
2016-04-20 12:01:36 +08:00
|
|
|
OpenMPDirectiveKind Kind, bool HasCancel,
|
|
|
|
const UntiedTaskActionTy &Action)
|
2015-09-15 20:52:43 +08:00
|
|
|
: CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
|
2016-04-20 12:01:36 +08:00
|
|
|
ThreadIDVar(ThreadIDVar), Action(Action) {
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
|
|
|
|
}
|
2016-02-11 03:11:58 +08:00
|
|
|
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
/// \brief Get a variable or parameter for storing global thread id
|
|
|
|
/// inside OpenMP construct.
|
2015-04-11 10:00:23 +08:00
|
|
|
const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
|
|
|
|
/// \brief Get an LValue for the current ThreadID variable.
|
2015-04-11 10:00:23 +08:00
|
|
|
LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
|
|
|
|
/// \brief Get the name of the capture helper.
|
|
|
|
StringRef getHelperName() const override { return ".omp_outlined."; }
|
|
|
|
|
2016-04-20 12:01:36 +08:00
|
|
|
void emitUntiedSwitch(CodeGenFunction &CGF) override {
|
|
|
|
Action.emitUntiedSwitch(CGF);
|
|
|
|
}
|
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
static bool classof(const CGCapturedStmtInfo *Info) {
|
|
|
|
return CGOpenMPRegionInfo::classof(Info) &&
|
|
|
|
cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
|
|
|
|
TaskOutlinedRegion;
|
|
|
|
}
|
|
|
|
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
private:
|
|
|
|
/// \brief A variable or parameter storing global thread id for OpenMP
|
|
|
|
/// constructs.
|
|
|
|
const VarDecl *ThreadIDVar;
|
2016-04-20 12:01:36 +08:00
|
|
|
/// Action for emitting code for untied tasks.
|
|
|
|
const UntiedTaskActionTy &Action;
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
};
|
|
|
|
|
2015-02-26 18:27:34 +08:00
|
|
|
/// \brief API for inlined captured statement code generation in OpenMP
|
|
|
|
/// constructs.
|
|
|
|
class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
|
|
|
|
public:
|
2015-04-10 12:50:10 +08:00
|
|
|
CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
|
2015-07-03 17:56:58 +08:00
|
|
|
const RegionCodeGenTy &CodeGen,
|
2015-09-15 20:52:43 +08:00
|
|
|
OpenMPDirectiveKind Kind, bool HasCancel)
|
|
|
|
: CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
|
|
|
|
OldCSI(OldCSI),
|
2015-02-26 18:27:34 +08:00
|
|
|
OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
|
2016-02-11 03:11:58 +08:00
|
|
|
|
2015-02-26 18:27:34 +08:00
|
|
|
// \brief Retrieve the value of the context parameter.
|
2015-04-11 10:00:23 +08:00
|
|
|
llvm::Value *getContextValue() const override {
|
2015-02-26 18:27:34 +08:00
|
|
|
if (OuterRegionInfo)
|
|
|
|
return OuterRegionInfo->getContextValue();
|
|
|
|
llvm_unreachable("No context value for inlined OpenMP region");
|
|
|
|
}
|
2016-02-11 03:11:58 +08:00
|
|
|
|
2015-09-11 01:07:54 +08:00
|
|
|
void setContextValue(llvm::Value *V) override {
|
2015-04-10 12:50:10 +08:00
|
|
|
if (OuterRegionInfo) {
|
|
|
|
OuterRegionInfo->setContextValue(V);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
llvm_unreachable("No context value for inlined OpenMP region");
|
|
|
|
}
|
2016-02-11 03:11:58 +08:00
|
|
|
|
2015-02-26 18:27:34 +08:00
|
|
|
/// \brief Lookup the captured field decl for a variable.
|
2015-04-11 10:00:23 +08:00
|
|
|
const FieldDecl *lookup(const VarDecl *VD) const override {
|
2015-02-26 18:27:34 +08:00
|
|
|
if (OuterRegionInfo)
|
|
|
|
return OuterRegionInfo->lookup(VD);
|
2015-04-15 12:52:20 +08:00
|
|
|
// If there is no outer outlined region,no need to lookup in a list of
|
|
|
|
// captured variables, we can use the original one.
|
|
|
|
return nullptr;
|
2015-02-26 18:27:34 +08:00
|
|
|
}
|
2016-02-11 03:11:58 +08:00
|
|
|
|
2015-04-11 10:00:23 +08:00
|
|
|
FieldDecl *getThisFieldDecl() const override {
|
2015-02-26 18:27:34 +08:00
|
|
|
if (OuterRegionInfo)
|
|
|
|
return OuterRegionInfo->getThisFieldDecl();
|
|
|
|
return nullptr;
|
|
|
|
}
|
2016-02-11 03:11:58 +08:00
|
|
|
|
2015-02-26 18:27:34 +08:00
|
|
|
/// \brief Get a variable or parameter for storing global thread id
|
|
|
|
/// inside OpenMP construct.
|
2015-04-11 10:00:23 +08:00
|
|
|
const VarDecl *getThreadIDVariable() const override {
|
2015-02-26 18:27:34 +08:00
|
|
|
if (OuterRegionInfo)
|
|
|
|
return OuterRegionInfo->getThreadIDVariable();
|
|
|
|
return nullptr;
|
|
|
|
}
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
|
2015-02-26 18:27:34 +08:00
|
|
|
/// \brief Get the name of the capture helper.
|
2015-04-11 10:00:23 +08:00
|
|
|
StringRef getHelperName() const override {
|
2015-04-10 12:50:10 +08:00
|
|
|
if (auto *OuterRegionInfo = getOldCSI())
|
|
|
|
return OuterRegionInfo->getHelperName();
|
2015-02-26 18:27:34 +08:00
|
|
|
llvm_unreachable("No helper name for inlined OpenMP construct");
|
|
|
|
}
|
|
|
|
|
2016-04-20 12:01:36 +08:00
|
|
|
void emitUntiedSwitch(CodeGenFunction &CGF) override {
|
|
|
|
if (OuterRegionInfo)
|
|
|
|
OuterRegionInfo->emitUntiedSwitch(CGF);
|
|
|
|
}
|
|
|
|
|
2015-02-26 18:27:34 +08:00
|
|
|
CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
|
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
static bool classof(const CGCapturedStmtInfo *Info) {
|
|
|
|
return CGOpenMPRegionInfo::classof(Info) &&
|
|
|
|
cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
|
|
|
|
}
|
|
|
|
|
2016-04-20 12:01:36 +08:00
|
|
|
~CGOpenMPInlinedRegionInfo() override = default;
|
|
|
|
|
2015-02-26 18:27:34 +08:00
|
|
|
private:
|
|
|
|
/// \brief CodeGen info about outer OpenMP region.
|
|
|
|
CodeGenFunction::CGCapturedStmtInfo *OldCSI;
|
|
|
|
CGOpenMPRegionInfo *OuterRegionInfo;
|
2014-10-10 20:19:54 +08:00
|
|
|
};
|
2015-04-10 12:50:10 +08:00
|
|
|
|
2015-10-03 00:14:20 +08:00
|
|
|
/// \brief API for captured statement code generation in OpenMP target
|
|
|
|
/// constructs. For this captures, implicit parameters are used instead of the
|
2016-01-06 21:42:12 +08:00
|
|
|
/// captured fields. The name of the target region has to be unique in a given
|
|
|
|
/// application so it is provided by the client, because only the client has
|
|
|
|
/// the information to generate that.
|
2016-04-20 12:01:36 +08:00
|
|
|
class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
|
2015-10-03 00:14:20 +08:00
|
|
|
public:
|
|
|
|
CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
|
2016-01-06 21:42:12 +08:00
|
|
|
const RegionCodeGenTy &CodeGen, StringRef HelperName)
|
2015-10-03 00:14:20 +08:00
|
|
|
: CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
|
2016-01-06 21:42:12 +08:00
|
|
|
/*HasCancel=*/false),
|
|
|
|
HelperName(HelperName) {}
|
2015-10-03 00:14:20 +08:00
|
|
|
|
|
|
|
/// \brief This is unused for target regions because each starts executing
|
|
|
|
/// with a single thread.
|
|
|
|
const VarDecl *getThreadIDVariable() const override { return nullptr; }
|
|
|
|
|
|
|
|
/// \brief Get the name of the capture helper.
|
2016-01-06 21:42:12 +08:00
|
|
|
StringRef getHelperName() const override { return HelperName; }
|
2015-10-03 00:14:20 +08:00
|
|
|
|
|
|
|
static bool classof(const CGCapturedStmtInfo *Info) {
|
|
|
|
return CGOpenMPRegionInfo::classof(Info) &&
|
|
|
|
cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
|
|
|
|
}
|
2016-01-06 21:42:12 +08:00
|
|
|
|
|
|
|
private:
|
|
|
|
StringRef HelperName;
|
2015-10-03 00:14:20 +08:00
|
|
|
};
|
|
|
|
|
2016-03-29 13:34:15 +08:00
|
|
|
static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
|
2016-03-04 00:20:23 +08:00
|
|
|
llvm_unreachable("No codegen for expressions");
|
|
|
|
}
|
|
|
|
/// \brief API for generation of expressions captured in a innermost OpenMP
|
|
|
|
/// region.
|
2016-04-20 12:01:36 +08:00
|
|
|
class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
|
2016-03-04 00:20:23 +08:00
|
|
|
public:
|
|
|
|
CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
|
|
|
|
: CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
|
|
|
|
OMPD_unknown,
|
|
|
|
/*HasCancel=*/false),
|
|
|
|
PrivScope(CGF) {
|
|
|
|
// Make sure the globals captured in the provided statement are local by
|
|
|
|
// using the privatization logic. We assume the same variable is not
|
|
|
|
// captured more than once.
|
|
|
|
for (auto &C : CS.captures()) {
|
|
|
|
if (!C.capturesVariable() && !C.capturesVariableByCopy())
|
|
|
|
continue;
|
|
|
|
|
|
|
|
const VarDecl *VD = C.getCapturedVar();
|
|
|
|
if (VD->isLocalVarDeclOrParm())
|
|
|
|
continue;
|
|
|
|
|
|
|
|
DeclRefExpr DRE(const_cast<VarDecl *>(VD),
|
|
|
|
/*RefersToEnclosingVariableOrCapture=*/false,
|
|
|
|
VD->getType().getNonReferenceType(), VK_LValue,
|
|
|
|
SourceLocation());
|
|
|
|
PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address {
|
|
|
|
return CGF.EmitLValue(&DRE).getAddress();
|
|
|
|
});
|
|
|
|
}
|
|
|
|
(void)PrivScope.Privatize();
|
|
|
|
}
|
|
|
|
|
|
|
|
/// \brief Lookup the captured field decl for a variable.
|
|
|
|
const FieldDecl *lookup(const VarDecl *VD) const override {
|
|
|
|
if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
|
|
|
|
return FD;
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// \brief Emit the captured statement body.
|
|
|
|
void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
|
|
|
|
llvm_unreachable("No body for expressions");
|
|
|
|
}
|
|
|
|
|
|
|
|
/// \brief Get a variable or parameter for storing global thread id
|
|
|
|
/// inside OpenMP construct.
|
|
|
|
const VarDecl *getThreadIDVariable() const override {
|
|
|
|
llvm_unreachable("No thread id for expressions");
|
|
|
|
}
|
|
|
|
|
|
|
|
/// \brief Get the name of the capture helper.
|
|
|
|
StringRef getHelperName() const override {
|
|
|
|
llvm_unreachable("No helper name for expressions");
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool classof(const CGCapturedStmtInfo *Info) { return false; }
|
|
|
|
|
|
|
|
private:
|
|
|
|
/// Private scope to capture global variables.
|
|
|
|
CodeGenFunction::OMPPrivateScope PrivScope;
|
|
|
|
};
|
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
/// \brief RAII for emitting code of OpenMP constructs.
|
|
|
|
class InlinedOpenMPRegionRAII {
|
|
|
|
CodeGenFunction &CGF;
|
2016-04-27 15:56:03 +08:00
|
|
|
llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
|
|
|
|
FieldDecl *LambdaThisCaptureField = nullptr;
|
2015-04-10 12:50:10 +08:00
|
|
|
|
|
|
|
public:
|
|
|
|
/// \brief Constructs region for combined constructs.
|
|
|
|
/// \param CodeGen Code generation sequence for combined directives. Includes
|
|
|
|
/// a list of functions used for code generation of implicitly inlined
|
|
|
|
/// regions.
|
2015-07-03 17:56:58 +08:00
|
|
|
InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
|
2015-09-15 20:52:43 +08:00
|
|
|
OpenMPDirectiveKind Kind, bool HasCancel)
|
2015-04-10 12:50:10 +08:00
|
|
|
: CGF(CGF) {
|
|
|
|
// Start emission for the construct.
|
2015-09-15 20:52:43 +08:00
|
|
|
CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
|
|
|
|
CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
|
2016-04-27 15:56:03 +08:00
|
|
|
std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
|
|
|
|
LambdaThisCaptureField = CGF.LambdaThisCaptureField;
|
|
|
|
CGF.LambdaThisCaptureField = nullptr;
|
2015-04-10 12:50:10 +08:00
|
|
|
}
|
2016-02-11 03:11:58 +08:00
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
~InlinedOpenMPRegionRAII() {
|
|
|
|
// Restore original CapturedStmtInfo only if we're done with code emission.
|
|
|
|
auto *OldCSI =
|
|
|
|
cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
|
|
|
|
delete CGF.CapturedStmtInfo;
|
|
|
|
CGF.CapturedStmtInfo = OldCSI;
|
2016-04-27 15:56:03 +08:00
|
|
|
std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
|
|
|
|
CGF.LambdaThisCaptureField = LambdaThisCaptureField;
|
2015-04-10 12:50:10 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2016-02-19 18:38:26 +08:00
|
|
|
/// \brief Values for bit flags used in the ident_t to describe the fields.
|
|
|
|
/// All enumeric elements are named and described in accordance with the code
|
|
|
|
/// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
|
|
|
|
enum OpenMPLocationFlags {
|
|
|
|
/// \brief Use trampoline for internal microtask.
|
|
|
|
OMP_IDENT_IMD = 0x01,
|
|
|
|
/// \brief Use c-style ident structure.
|
|
|
|
OMP_IDENT_KMPC = 0x02,
|
|
|
|
/// \brief Atomic reduction option for kmpc_reduce.
|
|
|
|
OMP_ATOMIC_REDUCE = 0x10,
|
|
|
|
/// \brief Explicit 'barrier' directive.
|
|
|
|
OMP_IDENT_BARRIER_EXPL = 0x20,
|
|
|
|
/// \brief Implicit barrier in code.
|
|
|
|
OMP_IDENT_BARRIER_IMPL = 0x40,
|
|
|
|
/// \brief Implicit barrier in 'for' directive.
|
|
|
|
OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
|
|
|
|
/// \brief Implicit barrier in 'sections' directive.
|
|
|
|
OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
|
|
|
|
/// \brief Implicit barrier in 'single' directive.
|
|
|
|
OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140
|
|
|
|
};
|
|
|
|
|
|
|
|
/// \brief Describes ident structure that describes a source location.
|
|
|
|
/// All descriptions are taken from
|
|
|
|
/// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
|
|
|
|
/// Original structure:
|
|
|
|
/// typedef struct ident {
|
|
|
|
/// kmp_int32 reserved_1; /**< might be used in Fortran;
|
|
|
|
/// see above */
|
|
|
|
/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
|
|
|
|
/// KMP_IDENT_KMPC identifies this union
|
|
|
|
/// member */
|
|
|
|
/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
|
|
|
|
/// see above */
|
|
|
|
///#if USE_ITT_BUILD
|
|
|
|
/// /* but currently used for storing
|
|
|
|
/// region-specific ITT */
|
|
|
|
/// /* contextual information. */
|
|
|
|
///#endif /* USE_ITT_BUILD */
|
|
|
|
/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
|
|
|
|
/// C++ */
|
|
|
|
/// char const *psource; /**< String describing the source location.
|
|
|
|
/// The string is composed of semi-colon separated
|
|
|
|
// fields which describe the source file,
|
|
|
|
/// the function and a pair of line numbers that
|
|
|
|
/// delimit the construct.
|
|
|
|
/// */
|
|
|
|
/// } ident_t;
|
|
|
|
enum IdentFieldIndex {
|
|
|
|
/// \brief might be used in Fortran
|
|
|
|
IdentField_Reserved_1,
|
|
|
|
/// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
|
|
|
|
IdentField_Flags,
|
|
|
|
/// \brief Not really used in Fortran any more
|
|
|
|
IdentField_Reserved_2,
|
|
|
|
/// \brief Source[4] in Fortran, do not use for C++
|
|
|
|
IdentField_Reserved_3,
|
|
|
|
/// \brief String describing the source location. The string is composed of
|
|
|
|
/// semi-colon separated fields which describe the source file, the function
|
|
|
|
/// and a pair of line numbers that delimit the construct.
|
|
|
|
IdentField_PSource
|
|
|
|
};
|
|
|
|
|
|
|
|
/// \brief Schedule types for 'omp for' loops (these enumerators are taken from
|
|
|
|
/// the enum sched_type in kmp.h).
|
|
|
|
enum OpenMPSchedType {
|
|
|
|
/// \brief Lower bound for default (unordered) versions.
|
|
|
|
OMP_sch_lower = 32,
|
|
|
|
OMP_sch_static_chunked = 33,
|
|
|
|
OMP_sch_static = 34,
|
|
|
|
OMP_sch_dynamic_chunked = 35,
|
|
|
|
OMP_sch_guided_chunked = 36,
|
|
|
|
OMP_sch_runtime = 37,
|
|
|
|
OMP_sch_auto = 38,
|
2016-05-30 21:05:14 +08:00
|
|
|
/// static with chunk adjustment (e.g., simd)
|
2016-12-13 02:00:20 +08:00
|
|
|
OMP_sch_static_balanced_chunked = 45,
|
2016-02-19 18:38:26 +08:00
|
|
|
/// \brief Lower bound for 'ordered' versions.
|
|
|
|
OMP_ord_lower = 64,
|
|
|
|
OMP_ord_static_chunked = 65,
|
|
|
|
OMP_ord_static = 66,
|
|
|
|
OMP_ord_dynamic_chunked = 67,
|
|
|
|
OMP_ord_guided_chunked = 68,
|
|
|
|
OMP_ord_runtime = 69,
|
|
|
|
OMP_ord_auto = 70,
|
|
|
|
OMP_sch_default = OMP_sch_static,
|
2016-03-08 00:04:49 +08:00
|
|
|
/// \brief dist_schedule types
|
|
|
|
OMP_dist_sch_static_chunked = 91,
|
|
|
|
OMP_dist_sch_static = 92,
|
2016-05-10 17:57:36 +08:00
|
|
|
/// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
|
|
|
|
/// Set if the monotonic schedule modifier was present.
|
|
|
|
OMP_sch_modifier_monotonic = (1 << 29),
|
|
|
|
/// Set if the nonmonotonic schedule modifier was present.
|
|
|
|
OMP_sch_modifier_nonmonotonic = (1 << 30),
|
2016-02-19 18:38:26 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
enum OpenMPRTLFunction {
|
|
|
|
/// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
|
|
|
|
/// kmpc_micro microtask, ...);
|
|
|
|
OMPRTL__kmpc_fork_call,
|
|
|
|
/// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc,
|
|
|
|
/// kmp_int32 global_tid, void *data, size_t size, void ***cache);
|
|
|
|
OMPRTL__kmpc_threadprivate_cached,
|
|
|
|
/// \brief Call to void __kmpc_threadprivate_register( ident_t *,
|
|
|
|
/// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
|
|
|
|
OMPRTL__kmpc_threadprivate_register,
|
|
|
|
// Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
|
|
|
|
OMPRTL__kmpc_global_thread_num,
|
|
|
|
// Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_critical_name *crit);
|
|
|
|
OMPRTL__kmpc_critical,
|
|
|
|
// Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
|
|
|
|
// global_tid, kmp_critical_name *crit, uintptr_t hint);
|
|
|
|
OMPRTL__kmpc_critical_with_hint,
|
|
|
|
// Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_critical_name *crit);
|
|
|
|
OMPRTL__kmpc_end_critical,
|
|
|
|
// Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
|
|
|
|
// global_tid);
|
|
|
|
OMPRTL__kmpc_cancel_barrier,
|
|
|
|
// Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
|
|
|
|
OMPRTL__kmpc_barrier,
|
|
|
|
// Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
|
|
|
|
OMPRTL__kmpc_for_static_fini,
|
|
|
|
// Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
|
|
|
|
// global_tid);
|
|
|
|
OMPRTL__kmpc_serialized_parallel,
|
|
|
|
// Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
|
|
|
|
// global_tid);
|
|
|
|
OMPRTL__kmpc_end_serialized_parallel,
|
|
|
|
// Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_int32 num_threads);
|
|
|
|
OMPRTL__kmpc_push_num_threads,
|
|
|
|
// Call to void __kmpc_flush(ident_t *loc);
|
|
|
|
OMPRTL__kmpc_flush,
|
|
|
|
// Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
|
|
|
|
OMPRTL__kmpc_master,
|
|
|
|
// Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
|
|
|
|
OMPRTL__kmpc_end_master,
|
|
|
|
// Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
|
|
|
|
// int end_part);
|
|
|
|
OMPRTL__kmpc_omp_taskyield,
|
|
|
|
// Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
|
|
|
|
OMPRTL__kmpc_single,
|
|
|
|
// Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
|
|
|
|
OMPRTL__kmpc_end_single,
|
|
|
|
// Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
|
|
|
|
// kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
|
|
|
|
// kmp_routine_entry_t *task_entry);
|
|
|
|
OMPRTL__kmpc_omp_task_alloc,
|
|
|
|
// Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
|
|
|
|
// new_task);
|
|
|
|
OMPRTL__kmpc_omp_task,
|
|
|
|
// Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
|
|
|
|
// kmp_int32 didit);
|
|
|
|
OMPRTL__kmpc_copyprivate,
|
|
|
|
// Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
|
|
|
|
// (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
|
|
|
|
OMPRTL__kmpc_reduce,
|
|
|
|
// Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
|
|
|
|
// global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
|
|
|
|
// void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
|
|
|
|
// *lck);
|
|
|
|
OMPRTL__kmpc_reduce_nowait,
|
|
|
|
// Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_critical_name *lck);
|
|
|
|
OMPRTL__kmpc_end_reduce,
|
|
|
|
// Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_critical_name *lck);
|
|
|
|
OMPRTL__kmpc_end_reduce_nowait,
|
|
|
|
// Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
|
|
|
|
// kmp_task_t * new_task);
|
|
|
|
OMPRTL__kmpc_omp_task_begin_if0,
|
|
|
|
// Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
|
|
|
|
// kmp_task_t * new_task);
|
|
|
|
OMPRTL__kmpc_omp_task_complete_if0,
|
|
|
|
// Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
|
|
|
|
OMPRTL__kmpc_ordered,
|
|
|
|
// Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
|
|
|
|
OMPRTL__kmpc_end_ordered,
|
|
|
|
// Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
|
|
|
|
// global_tid);
|
|
|
|
OMPRTL__kmpc_omp_taskwait,
|
|
|
|
// Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
|
|
|
|
OMPRTL__kmpc_taskgroup,
|
|
|
|
// Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
|
|
|
|
OMPRTL__kmpc_end_taskgroup,
|
|
|
|
// Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// int proc_bind);
|
|
|
|
OMPRTL__kmpc_push_proc_bind,
|
|
|
|
// Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
|
|
|
|
// gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
|
|
|
|
// *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
|
|
|
|
OMPRTL__kmpc_omp_task_with_deps,
|
|
|
|
// Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
|
|
|
|
// gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
|
|
|
|
// ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
|
|
|
|
OMPRTL__kmpc_omp_wait_deps,
|
|
|
|
// Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
|
|
|
|
// global_tid, kmp_int32 cncl_kind);
|
|
|
|
OMPRTL__kmpc_cancellationpoint,
|
|
|
|
// Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_int32 cncl_kind);
|
|
|
|
OMPRTL__kmpc_cancel,
|
2016-03-04 04:34:23 +08:00
|
|
|
// Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_int32 num_teams, kmp_int32 thread_limit);
|
|
|
|
OMPRTL__kmpc_push_num_teams,
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
// Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
|
|
|
|
// microtask, ...);
|
2016-03-04 04:34:23 +08:00
|
|
|
OMPRTL__kmpc_fork_teams,
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
// Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
|
|
|
|
// if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
|
|
|
|
// sched, kmp_uint64 grainsize, void *task_dup);
|
|
|
|
OMPRTL__kmpc_taskloop,
|
2016-05-25 20:36:08 +08:00
|
|
|
// Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
|
|
|
|
// num_dims, struct kmp_dim *dims);
|
|
|
|
OMPRTL__kmpc_doacross_init,
|
|
|
|
// Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
|
|
|
|
OMPRTL__kmpc_doacross_fini,
|
|
|
|
// Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
|
|
|
|
// *vec);
|
|
|
|
OMPRTL__kmpc_doacross_post,
|
|
|
|
// Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
|
|
|
|
// *vec);
|
|
|
|
OMPRTL__kmpc_doacross_wait,
|
2017-07-17 21:30:36 +08:00
|
|
|
// Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
|
|
|
|
// *data);
|
|
|
|
OMPRTL__kmpc_task_reduction_init,
|
|
|
|
// Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
|
|
|
|
// *d);
|
|
|
|
OMPRTL__kmpc_task_reduction_get_th_data,
|
2016-02-19 18:38:26 +08:00
|
|
|
|
|
|
|
//
|
|
|
|
// Offloading related calls
|
|
|
|
//
|
|
|
|
// Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
|
|
|
|
// arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
|
|
|
|
// *arg_types);
|
|
|
|
OMPRTL__tgt_target,
|
2016-03-04 00:20:23 +08:00
|
|
|
// Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
|
|
|
|
// int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
|
|
|
|
// int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
|
|
|
|
OMPRTL__tgt_target_teams,
|
2016-02-19 18:38:26 +08:00
|
|
|
// Call to void __tgt_register_lib(__tgt_bin_desc *desc);
|
|
|
|
OMPRTL__tgt_register_lib,
|
|
|
|
// Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
|
|
|
|
OMPRTL__tgt_unregister_lib,
|
2016-04-28 06:58:19 +08:00
|
|
|
// Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
|
|
|
|
// void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
|
|
|
|
OMPRTL__tgt_target_data_begin,
|
|
|
|
// Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
|
|
|
|
// void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
|
|
|
|
OMPRTL__tgt_target_data_end,
|
2016-05-27 02:30:22 +08:00
|
|
|
// Call to void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
|
|
|
|
// void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
|
|
|
|
OMPRTL__tgt_target_data_update,
|
2016-02-19 18:38:26 +08:00
|
|
|
};
|
|
|
|
|
2016-03-29 13:34:15 +08:00
|
|
|
/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
|
|
|
|
/// region.
|
|
|
|
class CleanupTy final : public EHScopeStack::Cleanup {
|
|
|
|
PrePostActionTy *Action;
|
|
|
|
|
|
|
|
public:
|
|
|
|
explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
|
|
|
|
void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
|
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
|
|
|
Action->Exit(CGF);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2015-09-11 01:07:54 +08:00
|
|
|
} // anonymous namespace
|
2014-10-10 20:19:54 +08:00
|
|
|
|
2016-03-29 13:34:15 +08:00
|
|
|
void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
|
|
|
|
CodeGenFunction::RunCleanupsScope Scope(CGF);
|
|
|
|
if (PrePostAction) {
|
|
|
|
CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
|
|
|
|
Callback(CodeGen, CGF, *PrePostAction);
|
|
|
|
} else {
|
|
|
|
PrePostActionTy Action;
|
|
|
|
Callback(CodeGen, CGF, Action);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-07-13 21:36:14 +08:00
|
|
|
/// Check if the combiner is a call to UDR combiner and if it is so return the
|
|
|
|
/// UDR decl used for reduction.
|
|
|
|
static const OMPDeclareReductionDecl *
|
|
|
|
getReductionInit(const Expr *ReductionOp) {
|
|
|
|
if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
|
|
|
|
if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
|
|
|
|
if (auto *DRE =
|
|
|
|
dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
|
|
|
|
if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
|
|
|
|
return DRD;
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
|
|
|
|
const OMPDeclareReductionDecl *DRD,
|
|
|
|
const Expr *InitOp,
|
|
|
|
Address Private, Address Original,
|
|
|
|
QualType Ty) {
|
|
|
|
if (DRD->getInitializer()) {
|
|
|
|
std::pair<llvm::Function *, llvm::Function *> Reduction =
|
|
|
|
CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
|
|
|
|
auto *CE = cast<CallExpr>(InitOp);
|
|
|
|
auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
|
|
|
|
const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
|
|
|
|
const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
|
|
|
|
auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
|
|
|
|
auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
|
|
|
|
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
|
|
|
|
PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
|
|
|
|
[=]() -> Address { return Private; });
|
|
|
|
PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
|
|
|
|
[=]() -> Address { return Original; });
|
|
|
|
(void)PrivateScope.Privatize();
|
|
|
|
RValue Func = RValue::get(Reduction.second);
|
|
|
|
CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
|
|
|
|
CGF.EmitIgnoredExpr(InitOp);
|
|
|
|
} else {
|
|
|
|
llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
|
|
|
|
auto *GV = new llvm::GlobalVariable(
|
|
|
|
CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
|
|
|
|
llvm::GlobalValue::PrivateLinkage, Init, ".init");
|
|
|
|
LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
|
|
|
|
RValue InitRVal;
|
|
|
|
switch (CGF.getEvaluationKind(Ty)) {
|
|
|
|
case TEK_Scalar:
|
|
|
|
InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation());
|
|
|
|
break;
|
|
|
|
case TEK_Complex:
|
|
|
|
InitRVal =
|
|
|
|
RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation()));
|
|
|
|
break;
|
|
|
|
case TEK_Aggregate:
|
|
|
|
InitRVal = RValue::getAggregate(LV.getAddress());
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue);
|
|
|
|
CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
|
|
|
|
CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
|
|
|
|
/*IsInitializer=*/false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// \brief Emit initialization of arrays of complex types.
|
|
|
|
/// \param DestAddr Address of the array.
|
|
|
|
/// \param Type Type of array.
|
|
|
|
/// \param Init Initial expression of array.
|
|
|
|
/// \param SrcAddr Address of the original array.
|
|
|
|
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
|
|
|
|
QualType Type, const Expr *Init,
|
2017-07-17 21:30:36 +08:00
|
|
|
const OMPDeclareReductionDecl *DRD,
|
2017-07-13 21:36:14 +08:00
|
|
|
Address SrcAddr = Address::invalid()) {
|
|
|
|
// Perform element-by-element initialization.
|
|
|
|
QualType ElementTy;
|
|
|
|
|
|
|
|
// Drill down to the base element type on both arrays.
|
|
|
|
auto ArrayTy = Type->getAsArrayTypeUnsafe();
|
|
|
|
auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
|
|
|
|
DestAddr =
|
|
|
|
CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
|
|
|
|
if (DRD)
|
|
|
|
SrcAddr =
|
|
|
|
CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
|
|
|
|
|
|
|
|
llvm::Value *SrcBegin = nullptr;
|
|
|
|
if (DRD)
|
|
|
|
SrcBegin = SrcAddr.getPointer();
|
|
|
|
auto DestBegin = DestAddr.getPointer();
|
|
|
|
// Cast from pointer to array type to pointer to single element.
|
|
|
|
auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
|
|
|
|
// The basic structure here is a while-do loop.
|
|
|
|
auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
|
|
|
|
auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
|
|
|
|
auto IsEmpty =
|
|
|
|
CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
|
|
|
|
CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
|
|
|
|
|
|
|
|
// Enter the loop body, making that address the current address.
|
|
|
|
auto EntryBB = CGF.Builder.GetInsertBlock();
|
|
|
|
CGF.EmitBlock(BodyBB);
|
|
|
|
|
|
|
|
CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
|
|
|
|
|
|
|
|
llvm::PHINode *SrcElementPHI = nullptr;
|
|
|
|
Address SrcElementCurrent = Address::invalid();
|
|
|
|
if (DRD) {
|
|
|
|
SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
|
|
|
|
"omp.arraycpy.srcElementPast");
|
|
|
|
SrcElementPHI->addIncoming(SrcBegin, EntryBB);
|
|
|
|
SrcElementCurrent =
|
|
|
|
Address(SrcElementPHI,
|
|
|
|
SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
|
|
|
|
}
|
|
|
|
llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
|
|
|
|
DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
|
|
|
|
DestElementPHI->addIncoming(DestBegin, EntryBB);
|
|
|
|
Address DestElementCurrent =
|
|
|
|
Address(DestElementPHI,
|
|
|
|
DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
|
|
|
|
|
|
|
|
// Emit copy.
|
|
|
|
{
|
|
|
|
CodeGenFunction::RunCleanupsScope InitScope(CGF);
|
|
|
|
if (DRD && (DRD->getInitializer() || !Init)) {
|
|
|
|
emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
|
|
|
|
SrcElementCurrent, ElementTy);
|
|
|
|
} else
|
|
|
|
CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
|
|
|
|
/*IsInitializer=*/false);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (DRD) {
|
|
|
|
// Shift the address forward by one element.
|
|
|
|
auto SrcElementNext = CGF.Builder.CreateConstGEP1_32(
|
|
|
|
SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
|
|
|
|
SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Shift the address forward by one element.
|
|
|
|
auto DestElementNext = CGF.Builder.CreateConstGEP1_32(
|
|
|
|
DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
|
|
|
|
// Check whether we've reached the end.
|
|
|
|
auto Done =
|
|
|
|
CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
|
|
|
|
CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
|
|
|
|
DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
|
|
|
|
|
|
|
|
// Done.
|
|
|
|
CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
|
|
|
|
}
|
|
|
|
|
|
|
|
LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
|
|
|
|
if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
|
|
|
|
return CGF.EmitOMPArraySectionExpr(OASE);
|
|
|
|
if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E))
|
|
|
|
return CGF.EmitLValue(ASE);
|
|
|
|
auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
|
|
|
|
DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
|
|
|
|
CGF.CapturedStmtInfo &&
|
|
|
|
CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
|
|
|
|
E->getType(), VK_LValue, E->getExprLoc());
|
|
|
|
// Store the address of the original variable associated with the LHS
|
|
|
|
// implicit variable.
|
|
|
|
return CGF.EmitLValue(&DRE);
|
|
|
|
}
|
|
|
|
|
|
|
|
LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
|
|
|
|
const Expr *E) {
|
|
|
|
if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
|
|
|
|
return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
|
|
|
|
return LValue();
|
|
|
|
}
|
|
|
|
|
2017-07-17 21:30:36 +08:00
|
|
|
void ReductionCodeGen::emitAggregateInitialization(
|
|
|
|
CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
|
|
|
|
const OMPDeclareReductionDecl *DRD) {
|
2017-07-13 21:36:14 +08:00
|
|
|
// Emit VarDecl with copy init for arrays.
|
|
|
|
// Get the address of the original variable captured in current
|
|
|
|
// captured region.
|
|
|
|
auto *PrivateVD =
|
|
|
|
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
|
|
|
|
EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
|
|
|
|
DRD ? ClausesData[N].ReductionOp : PrivateVD->getInit(),
|
2017-07-17 21:30:36 +08:00
|
|
|
DRD, SharedLVal.getAddress());
|
2017-07-13 21:36:14 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
|
|
|
|
ArrayRef<const Expr *> Privates,
|
|
|
|
ArrayRef<const Expr *> ReductionOps) {
|
|
|
|
ClausesData.reserve(Shareds.size());
|
|
|
|
SharedAddresses.reserve(Shareds.size());
|
|
|
|
Sizes.reserve(Shareds.size());
|
2017-07-17 21:30:36 +08:00
|
|
|
BaseDecls.reserve(Shareds.size());
|
2017-07-13 21:36:14 +08:00
|
|
|
auto IPriv = Privates.begin();
|
|
|
|
auto IRed = ReductionOps.begin();
|
|
|
|
for (const auto *Ref : Shareds) {
|
|
|
|
ClausesData.emplace_back(Ref, *IPriv, *IRed);
|
|
|
|
std::advance(IPriv, 1);
|
|
|
|
std::advance(IRed, 1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
|
|
|
|
assert(SharedAddresses.size() == N &&
|
|
|
|
"Number of generated lvalues must be exactly N.");
|
|
|
|
SharedAddresses.emplace_back(emitSharedLValue(CGF, ClausesData[N].Ref),
|
|
|
|
emitSharedLValueUB(CGF, ClausesData[N].Ref));
|
|
|
|
}
|
|
|
|
|
|
|
|
void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
|
|
|
|
auto *PrivateVD =
|
|
|
|
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
|
|
|
|
QualType PrivateType = PrivateVD->getType();
|
|
|
|
bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
|
|
|
|
if (!AsArraySection && !PrivateType->isVariablyModifiedType()) {
|
2017-07-17 21:30:36 +08:00
|
|
|
Sizes.emplace_back(
|
|
|
|
CGF.getTypeSize(
|
|
|
|
SharedAddresses[N].first.getType().getNonReferenceType()),
|
|
|
|
nullptr);
|
2017-07-13 21:36:14 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
llvm::Value *Size;
|
2017-07-17 21:30:36 +08:00
|
|
|
llvm::Value *SizeInChars;
|
|
|
|
llvm::Type *ElemType =
|
|
|
|
cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
|
|
|
|
->getElementType();
|
|
|
|
auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
|
2017-07-13 21:36:14 +08:00
|
|
|
if (AsArraySection) {
|
|
|
|
Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
|
|
|
|
SharedAddresses[N].first.getPointer());
|
|
|
|
Size = CGF.Builder.CreateNUWAdd(
|
|
|
|
Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
|
2017-07-17 21:30:36 +08:00
|
|
|
SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
|
2017-07-13 21:36:14 +08:00
|
|
|
} else {
|
2017-07-17 21:30:36 +08:00
|
|
|
SizeInChars = CGF.getTypeSize(
|
2017-07-13 21:36:14 +08:00
|
|
|
SharedAddresses[N].first.getType().getNonReferenceType());
|
2017-07-17 21:30:36 +08:00
|
|
|
Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
|
2017-07-13 21:36:14 +08:00
|
|
|
}
|
2017-07-17 21:30:36 +08:00
|
|
|
Sizes.emplace_back(SizeInChars, Size);
|
2017-07-13 21:36:14 +08:00
|
|
|
CodeGenFunction::OpaqueValueMapping OpaqueMap(
|
|
|
|
CGF,
|
|
|
|
cast<OpaqueValueExpr>(
|
|
|
|
CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
|
|
|
|
RValue::get(Size));
|
|
|
|
CGF.EmitVariablyModifiedType(PrivateType);
|
|
|
|
}
|
|
|
|
|
|
|
|
void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
|
|
|
|
llvm::Value *Size) {
|
|
|
|
auto *PrivateVD =
|
|
|
|
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
|
|
|
|
QualType PrivateType = PrivateVD->getType();
|
|
|
|
bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
|
|
|
|
if (!AsArraySection && !PrivateType->isVariablyModifiedType()) {
|
2017-07-17 21:30:36 +08:00
|
|
|
assert(!Size && !Sizes[N].second &&
|
2017-07-13 21:36:14 +08:00
|
|
|
"Size should be nullptr for non-variably modified redution "
|
|
|
|
"items.");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
CodeGenFunction::OpaqueValueMapping OpaqueMap(
|
|
|
|
CGF,
|
|
|
|
cast<OpaqueValueExpr>(
|
|
|
|
CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
|
|
|
|
RValue::get(Size));
|
|
|
|
CGF.EmitVariablyModifiedType(PrivateType);
|
|
|
|
}
|
|
|
|
|
|
|
|
void ReductionCodeGen::emitInitialization(
|
|
|
|
CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
|
|
|
|
llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
|
|
|
|
assert(SharedAddresses.size() > N && "No variable was generated");
|
|
|
|
auto *PrivateVD =
|
|
|
|
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
|
|
|
|
auto *DRD = getReductionInit(ClausesData[N].ReductionOp);
|
|
|
|
QualType PrivateType = PrivateVD->getType();
|
|
|
|
PrivateAddr = CGF.Builder.CreateElementBitCast(
|
|
|
|
PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
|
|
|
|
QualType SharedType = SharedAddresses[N].first.getType();
|
|
|
|
SharedLVal = CGF.MakeAddrLValue(
|
|
|
|
CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
|
|
|
|
CGF.ConvertTypeForMem(SharedType)),
|
|
|
|
SharedType, SharedAddresses[N].first.getBaseInfo());
|
|
|
|
if (isa<OMPArraySectionExpr>(ClausesData[N].Ref) ||
|
|
|
|
CGF.getContext().getAsArrayType(PrivateVD->getType())) {
|
2017-07-17 21:30:36 +08:00
|
|
|
emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
|
2017-07-13 21:36:14 +08:00
|
|
|
} else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
|
|
|
|
emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
|
|
|
|
PrivateAddr, SharedLVal.getAddress(),
|
|
|
|
SharedLVal.getType());
|
|
|
|
} else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
|
|
|
|
!CGF.isTrivialInitializer(PrivateVD->getInit())) {
|
|
|
|
CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
|
|
|
|
PrivateVD->getType().getQualifiers(),
|
|
|
|
/*IsInitializer=*/false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool ReductionCodeGen::needCleanups(unsigned N) {
|
|
|
|
auto *PrivateVD =
|
|
|
|
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
|
|
|
|
QualType PrivateType = PrivateVD->getType();
|
|
|
|
QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
|
|
|
|
return DTorKind != QualType::DK_none;
|
|
|
|
}
|
|
|
|
|
|
|
|
void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
|
|
|
|
Address PrivateAddr) {
|
|
|
|
auto *PrivateVD =
|
|
|
|
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
|
|
|
|
QualType PrivateType = PrivateVD->getType();
|
|
|
|
QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
|
|
|
|
if (needCleanups(N)) {
|
|
|
|
PrivateAddr = CGF.Builder.CreateElementBitCast(
|
|
|
|
PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
|
|
|
|
CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
|
|
|
|
LValue BaseLV) {
|
|
|
|
BaseTy = BaseTy.getNonReferenceType();
|
|
|
|
while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
|
|
|
|
!CGF.getContext().hasSameType(BaseTy, ElTy)) {
|
|
|
|
if (auto *PtrTy = BaseTy->getAs<PointerType>())
|
|
|
|
BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
|
|
|
|
else {
|
|
|
|
BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(),
|
|
|
|
BaseTy->castAs<ReferenceType>());
|
|
|
|
}
|
|
|
|
BaseTy = BaseTy->getPointeeType();
|
|
|
|
}
|
|
|
|
return CGF.MakeAddrLValue(
|
|
|
|
CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
|
|
|
|
CGF.ConvertTypeForMem(ElTy)),
|
|
|
|
BaseLV.getType(), BaseLV.getBaseInfo());
|
|
|
|
}
|
|
|
|
|
|
|
|
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
|
|
|
|
llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
|
|
|
|
llvm::Value *Addr) {
|
|
|
|
Address Tmp = Address::invalid();
|
|
|
|
Address TopTmp = Address::invalid();
|
|
|
|
Address MostTopTmp = Address::invalid();
|
|
|
|
BaseTy = BaseTy.getNonReferenceType();
|
|
|
|
while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
|
|
|
|
!CGF.getContext().hasSameType(BaseTy, ElTy)) {
|
|
|
|
Tmp = CGF.CreateMemTemp(BaseTy);
|
|
|
|
if (TopTmp.isValid())
|
|
|
|
CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
|
|
|
|
else
|
|
|
|
MostTopTmp = Tmp;
|
|
|
|
TopTmp = Tmp;
|
|
|
|
BaseTy = BaseTy->getPointeeType();
|
|
|
|
}
|
|
|
|
llvm::Type *Ty = BaseLVType;
|
|
|
|
if (Tmp.isValid())
|
|
|
|
Ty = Tmp.getElementType();
|
|
|
|
Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
|
|
|
|
if (Tmp.isValid()) {
|
|
|
|
CGF.Builder.CreateStore(Addr, Tmp);
|
|
|
|
return MostTopTmp;
|
|
|
|
}
|
|
|
|
return Address(Addr, BaseLVAlignment);
|
|
|
|
}
|
|
|
|
|
|
|
|
Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
|
|
|
|
Address PrivateAddr) {
|
|
|
|
const DeclRefExpr *DE;
|
|
|
|
const VarDecl *OrigVD = nullptr;
|
|
|
|
if (auto *OASE = dyn_cast<OMPArraySectionExpr>(ClausesData[N].Ref)) {
|
|
|
|
auto *Base = OASE->getBase()->IgnoreParenImpCasts();
|
|
|
|
while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
|
|
|
|
Base = TempOASE->getBase()->IgnoreParenImpCasts();
|
|
|
|
while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
|
|
|
|
Base = TempASE->getBase()->IgnoreParenImpCasts();
|
|
|
|
DE = cast<DeclRefExpr>(Base);
|
|
|
|
OrigVD = cast<VarDecl>(DE->getDecl());
|
|
|
|
} else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(ClausesData[N].Ref)) {
|
|
|
|
auto *Base = ASE->getBase()->IgnoreParenImpCasts();
|
|
|
|
while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
|
|
|
|
Base = TempASE->getBase()->IgnoreParenImpCasts();
|
|
|
|
DE = cast<DeclRefExpr>(Base);
|
|
|
|
OrigVD = cast<VarDecl>(DE->getDecl());
|
|
|
|
}
|
|
|
|
if (OrigVD) {
|
|
|
|
BaseDecls.emplace_back(OrigVD);
|
|
|
|
auto OriginalBaseLValue = CGF.EmitLValue(DE);
|
|
|
|
LValue BaseLValue =
|
|
|
|
loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
|
|
|
|
OriginalBaseLValue);
|
|
|
|
llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
|
|
|
|
BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
|
|
|
|
llvm::Value *Ptr =
|
|
|
|
CGF.Builder.CreateGEP(PrivateAddr.getPointer(), Adjustment);
|
|
|
|
return castToBase(CGF, OrigVD->getType(),
|
|
|
|
SharedAddresses[N].first.getType(),
|
|
|
|
OriginalBaseLValue.getPointer()->getType(),
|
|
|
|
OriginalBaseLValue.getAlignment(), Ptr);
|
|
|
|
}
|
|
|
|
BaseDecls.emplace_back(
|
|
|
|
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
|
|
|
|
return PrivateAddr;
|
|
|
|
}
|
|
|
|
|
2017-07-17 21:30:36 +08:00
|
|
|
bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
|
|
|
|
auto *DRD = getReductionInit(ClausesData[N].ReductionOp);
|
|
|
|
return DRD && DRD->getInitializer();
|
|
|
|
}
|
|
|
|
|
2014-10-10 20:19:54 +08:00
|
|
|
LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
|
2016-02-04 19:27:03 +08:00
|
|
|
return CGF.EmitLoadOfPointerLValue(
|
|
|
|
CGF.GetAddrOfLocalVar(getThreadIDVariable()),
|
|
|
|
getThreadIDVariable()->getType()->castAs<PointerType>());
|
2014-10-10 20:19:54 +08:00
|
|
|
}
|
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-04-10 12:50:10 +08:00
|
|
|
// 1.2.2 OpenMP Language Terminology
|
|
|
|
// Structured block - An executable statement with a single entry at the
|
|
|
|
// top and a single exit at the bottom.
|
|
|
|
// The point of exit cannot be a branch out of the structured block.
|
|
|
|
// longjmp() and throw() must not violate the entry/exit criteria.
|
|
|
|
CGF.EHStack.pushTerminate();
|
2016-03-29 13:34:15 +08:00
|
|
|
CodeGen(CGF);
|
2015-04-10 12:50:10 +08:00
|
|
|
CGF.EHStack.popTerminate();
|
2014-10-08 22:01:46 +08:00
|
|
|
}
|
|
|
|
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
|
|
|
|
CodeGenFunction &CGF) {
|
2015-09-10 16:12:02 +08:00
|
|
|
return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
|
|
|
|
getThreadIDVariable()->getType(),
|
2017-05-19 01:07:11 +08:00
|
|
|
LValueBaseInfo(AlignmentSource::Decl, false));
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
}
|
|
|
|
|
2014-05-06 18:08:46 +08:00
|
|
|
CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
|
2016-03-04 17:22:22 +08:00
|
|
|
: CGM(CGM), OffloadEntriesInfoManager(CGM) {
|
2014-05-06 18:08:46 +08:00
|
|
|
IdentTy = llvm::StructType::create(
|
|
|
|
"ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
|
|
|
|
CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
|
2017-05-10 03:31:30 +08:00
|
|
|
CGM.Int8PtrTy /* psource */);
|
2014-09-22 18:01:53 +08:00
|
|
|
KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
|
2016-01-06 21:42:12 +08:00
|
|
|
|
|
|
|
loadOffloadInfoMetadata();
|
2014-05-06 18:08:46 +08:00
|
|
|
}
|
|
|
|
|
2015-03-18 12:13:55 +08:00
|
|
|
void CGOpenMPRuntime::clear() {
|
|
|
|
InternalVars.clear();
|
|
|
|
}
|
|
|
|
|
2016-03-04 17:22:22 +08:00
|
|
|
static llvm::Function *
|
|
|
|
emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
|
|
|
|
const Expr *CombinerInitializer, const VarDecl *In,
|
|
|
|
const VarDecl *Out, bool IsCombiner) {
|
|
|
|
// void .omp_combiner.(Ty *in, Ty *out);
|
|
|
|
auto &C = CGM.getContext();
|
|
|
|
QualType PtrTy = C.getPointerType(Ty).withRestrict();
|
|
|
|
FunctionArgList Args;
|
|
|
|
ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
|
2017-06-09 21:40:18 +08:00
|
|
|
/*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
|
2016-03-17 18:19:46 +08:00
|
|
|
ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
|
2017-06-09 21:40:18 +08:00
|
|
|
/*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
|
2016-03-04 17:22:22 +08:00
|
|
|
Args.push_back(&OmpOutParm);
|
2016-03-17 18:19:46 +08:00
|
|
|
Args.push_back(&OmpInParm);
|
2016-03-04 17:22:22 +08:00
|
|
|
auto &FnInfo =
|
2016-03-11 12:30:31 +08:00
|
|
|
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
|
2016-03-04 17:22:22 +08:00
|
|
|
auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
|
|
|
|
auto *Fn = llvm::Function::Create(
|
|
|
|
FnTy, llvm::GlobalValue::InternalLinkage,
|
|
|
|
IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule());
|
|
|
|
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
|
Cleanup the handling of noinline function attributes, -fno-inline,
-fno-inline-functions, -O0, and optnone.
These were really, really tangled together:
- We used the noinline LLVM attribute for -fno-inline
- But not for -fno-inline-functions (breaking LTO)
- But we did use it for -finline-hint-functions (yay, LTO is happy!)
- But we didn't for -O0 (LTO is sad yet again...)
- We had weird structuring of CodeGenOpts with both an inlining
enumeration and a boolean. They interacted in weird ways and
needlessly.
- A *lot* of set smashing went on with setting these, and then got worse
when we considered optnone and other inlining-effecting attributes.
- A bunch of inline affecting attributes were managed in a completely
different place from -fno-inline.
- Even with -fno-inline we failed to put the LLVM noinline attribute
onto many generated function definitions because they didn't show up
as AST-level functions.
- If you passed -O0 but -finline-functions we would run the normal
inliner pass in LLVM despite it being in the O0 pipeline, which really
doesn't make much sense.
- Lastly, we used things like '-fno-inline' to manipulate the pass
pipeline which forced the pass pipeline to be much more
parameterizable than it really needs to be. Instead we can *just* use
the optimization level to select a pipeline and control the rest via
attributes.
Sadly, this causes a bunch of churn in tests because we don't run the
optimizer in the tests and check the contents of attribute sets. It
would be awesome if attribute sets were a bit more FileCheck friendly,
but oh well.
I think this is a significant improvement and should remove the semantic
need to change what inliner pass we run in order to comply with the
requested inlining semantics by relying completely on attributes. It
also cleans up tho optnone and related handling a bit.
One unfortunate aspect of this is that for generating alwaysinline
routines like those in OpenMP we end up removing noinline and then
adding alwaysinline. I tried a bunch of other approaches, but because we
recompute function attributes from scratch and don't have a declaration
here I couldn't find anything substantially cleaner than this.
Differential Revision: https://reviews.llvm.org/D28053
llvm-svn: 290398
2016-12-23 09:24:49 +08:00
|
|
|
Fn->removeFnAttr(llvm::Attribute::NoInline);
|
2017-05-29 13:38:20 +08:00
|
|
|
Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
|
2016-03-17 18:19:46 +08:00
|
|
|
Fn->addFnAttr(llvm::Attribute::AlwaysInline);
|
2016-03-04 17:22:22 +08:00
|
|
|
CodeGenFunction CGF(CGM);
|
|
|
|
// Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
|
|
|
|
// Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
|
|
|
|
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
|
|
|
|
CodeGenFunction::OMPPrivateScope Scope(CGF);
|
|
|
|
Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
|
|
|
|
Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address {
|
|
|
|
return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
|
|
|
|
.getAddress();
|
|
|
|
});
|
|
|
|
Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
|
|
|
|
Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address {
|
|
|
|
return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
|
|
|
|
.getAddress();
|
|
|
|
});
|
|
|
|
(void)Scope.Privatize();
|
|
|
|
CGF.EmitIgnoredExpr(CombinerInitializer);
|
|
|
|
Scope.ForceCleanup();
|
|
|
|
CGF.FinishFunction();
|
|
|
|
return Fn;
|
|
|
|
}
|
|
|
|
|
|
|
|
void CGOpenMPRuntime::emitUserDefinedReduction(
|
|
|
|
CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
|
|
|
|
if (UDRMap.count(D) > 0)
|
|
|
|
return;
|
|
|
|
auto &C = CGM.getContext();
|
|
|
|
if (!In || !Out) {
|
|
|
|
In = &C.Idents.get("omp_in");
|
|
|
|
Out = &C.Idents.get("omp_out");
|
|
|
|
}
|
|
|
|
llvm::Function *Combiner = emitCombinerOrInitializer(
|
|
|
|
CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()),
|
|
|
|
cast<VarDecl>(D->lookup(Out).front()),
|
|
|
|
/*IsCombiner=*/true);
|
|
|
|
llvm::Function *Initializer = nullptr;
|
|
|
|
if (auto *Init = D->getInitializer()) {
|
|
|
|
if (!Priv || !Orig) {
|
|
|
|
Priv = &C.Idents.get("omp_priv");
|
|
|
|
Orig = &C.Idents.get("omp_orig");
|
|
|
|
}
|
|
|
|
Initializer = emitCombinerOrInitializer(
|
|
|
|
CGM, D->getType(), Init, cast<VarDecl>(D->lookup(Orig).front()),
|
|
|
|
cast<VarDecl>(D->lookup(Priv).front()),
|
|
|
|
/*IsCombiner=*/false);
|
|
|
|
}
|
|
|
|
UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer)));
|
|
|
|
if (CGF) {
|
|
|
|
auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
|
|
|
|
Decls.second.push_back(D);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-03-17 18:19:46 +08:00
|
|
|
std::pair<llvm::Function *, llvm::Function *>
|
|
|
|
CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
|
|
|
|
auto I = UDRMap.find(D);
|
|
|
|
if (I != UDRMap.end())
|
|
|
|
return I->second;
|
|
|
|
emitUserDefinedReduction(/*CGF=*/nullptr, D);
|
|
|
|
return UDRMap.lookup(D);
|
|
|
|
}
|
|
|
|
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
// Layout information for ident_t.
|
|
|
|
static CharUnits getIdentAlign(CodeGenModule &CGM) {
|
|
|
|
return CGM.getPointerAlign();
|
|
|
|
}
|
|
|
|
static CharUnits getIdentSize(CodeGenModule &CGM) {
|
|
|
|
assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
|
|
|
|
return CharUnits::fromQuantity(16) + CGM.getPointerSize();
|
|
|
|
}
|
2016-02-19 18:38:26 +08:00
|
|
|
static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) {
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
// All the fields except the last are i32, so this works beautifully.
|
|
|
|
return unsigned(Field) * CharUnits::fromQuantity(4);
|
|
|
|
}
|
|
|
|
static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
|
2016-02-19 18:38:26 +08:00
|
|
|
IdentFieldIndex Field,
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
const llvm::Twine &Name = "") {
|
|
|
|
auto Offset = getOffsetOfIdentField(Field);
|
|
|
|
return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
|
|
|
|
}
|
|
|
|
|
2017-01-19 02:18:53 +08:00
|
|
|
static llvm::Value *emitParallelOrTeamsOutlinedFunction(
|
|
|
|
CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
|
|
|
|
const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
|
|
|
|
const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
assert(ThreadIDVar->getType()->isPointerType() &&
|
|
|
|
"thread id variable must be of type kmp_int32 *");
|
2014-10-10 20:19:54 +08:00
|
|
|
CodeGenFunction CGF(CGM, true);
|
2015-09-15 20:52:43 +08:00
|
|
|
bool HasCancel = false;
|
|
|
|
if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
|
|
|
|
HasCancel = OPD->hasCancel();
|
|
|
|
else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
|
|
|
|
HasCancel = OPSD->hasCancel();
|
|
|
|
else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
|
|
|
|
HasCancel = OPFD->hasCancel();
|
|
|
|
CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
|
2017-01-19 02:18:53 +08:00
|
|
|
HasCancel, OutlinedHelperName);
|
2015-06-24 11:35:38 +08:00
|
|
|
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
|
2015-09-10 16:12:02 +08:00
|
|
|
return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
|
2014-10-10 20:19:54 +08:00
|
|
|
}
|
|
|
|
|
2017-01-19 02:18:53 +08:00
|
|
|
llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
|
|
|
|
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
|
|
|
|
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
|
|
|
|
const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
|
|
|
|
return emitParallelOrTeamsOutlinedFunction(
|
|
|
|
CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
|
|
|
|
}
|
|
|
|
|
|
|
|
llvm::Value *CGOpenMPRuntime::emitTeamsOutlinedFunction(
|
|
|
|
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
|
|
|
|
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
|
|
|
|
const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
|
|
|
|
return emitParallelOrTeamsOutlinedFunction(
|
|
|
|
CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
|
|
|
|
}
|
|
|
|
|
2015-07-03 17:56:58 +08:00
|
|
|
llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
|
|
|
|
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
|
2016-04-20 12:01:36 +08:00
|
|
|
const VarDecl *PartIDVar, const VarDecl *TaskTVar,
|
|
|
|
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
|
|
|
|
bool Tied, unsigned &NumberOfParts) {
|
|
|
|
auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
|
|
|
|
PrePostActionTy &) {
|
|
|
|
auto *ThreadID = getThreadID(CGF, D.getLocStart());
|
|
|
|
auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart());
|
|
|
|
llvm::Value *TaskArgs[] = {
|
|
|
|
UpLoc, ThreadID,
|
|
|
|
CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
|
|
|
|
TaskTVar->getType()->castAs<PointerType>())
|
|
|
|
.getPointer()};
|
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
|
|
|
|
};
|
|
|
|
CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
|
|
|
|
UntiedCodeGen);
|
|
|
|
CodeGen.setAction(Action);
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
assert(!ThreadIDVar->getType()->isPointerType() &&
|
|
|
|
"thread id variable must be of type kmp_int32 for tasks");
|
|
|
|
auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
auto *TD = dyn_cast<OMPTaskDirective>(&D);
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
CodeGenFunction CGF(CGM, true);
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
|
|
|
|
InnermostKind,
|
|
|
|
TD ? TD->hasCancel() : false, Action);
|
2015-06-24 11:35:38 +08:00
|
|
|
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
|
2016-04-20 12:01:36 +08:00
|
|
|
auto *Res = CGF.GenerateCapturedStmtFunction(*CS);
|
|
|
|
if (!Tied)
|
|
|
|
NumberOfParts = Action.getNumberOfParts();
|
|
|
|
return Res;
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
}
|
|
|
|
|
2016-02-19 18:38:26 +08:00
|
|
|
Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CharUnits Align = getIdentAlign(CGM);
|
2014-05-07 14:18:01 +08:00
|
|
|
llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
|
2014-05-06 18:08:46 +08:00
|
|
|
if (!Entry) {
|
|
|
|
if (!DefaultOpenMPPSource) {
|
|
|
|
// Initialize default location for psource field of ident_t structure of
|
|
|
|
// all ident_t objects. Format is ";file;function;line;column;;".
|
|
|
|
// Taken from
|
|
|
|
// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
|
|
|
|
DefaultOpenMPPSource =
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
|
2014-05-06 18:08:46 +08:00
|
|
|
DefaultOpenMPPSource =
|
|
|
|
llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
|
|
|
|
}
|
2016-11-19 16:17:24 +08:00
|
|
|
|
2016-11-29 06:18:27 +08:00
|
|
|
ConstantInitBuilder builder(CGM);
|
2016-11-19 16:17:24 +08:00
|
|
|
auto fields = builder.beginStruct(IdentTy);
|
|
|
|
fields.addInt(CGM.Int32Ty, 0);
|
|
|
|
fields.addInt(CGM.Int32Ty, Flags);
|
|
|
|
fields.addInt(CGM.Int32Ty, 0);
|
|
|
|
fields.addInt(CGM.Int32Ty, 0);
|
|
|
|
fields.add(DefaultOpenMPPSource);
|
|
|
|
auto DefaultOpenMPLocation =
|
|
|
|
fields.finishAndCreateGlobal("", Align, /*isConstant*/ true,
|
|
|
|
llvm::GlobalValue::PrivateLinkage);
|
2016-06-15 05:02:05 +08:00
|
|
|
DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
|
2016-11-19 16:17:24 +08:00
|
|
|
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
|
2014-05-06 18:08:46 +08:00
|
|
|
}
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
return Address(Entry, Align);
|
2014-05-06 18:08:46 +08:00
|
|
|
}
|
|
|
|
|
2015-02-25 16:32:46 +08:00
|
|
|
llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
|
|
|
|
SourceLocation Loc,
|
2016-02-19 18:38:26 +08:00
|
|
|
unsigned Flags) {
|
|
|
|
Flags |= OMP_IDENT_KMPC;
|
2014-05-06 18:08:46 +08:00
|
|
|
// If no debug info is generated - return global default location.
|
2016-02-02 19:06:51 +08:00
|
|
|
if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
|
2014-05-06 18:08:46 +08:00
|
|
|
Loc.isInvalid())
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
return getOrCreateDefaultLocation(Flags).getPointer();
|
2014-05-06 18:08:46 +08:00
|
|
|
|
|
|
|
assert(CGF.CurFn && "No function in current CodeGenFunction.");
|
|
|
|
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address LocValue = Address::invalid();
|
2014-12-03 20:11:24 +08:00
|
|
|
auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
|
|
|
|
if (I != OpenMPLocThreadIDMap.end())
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
|
|
|
|
|
2014-12-15 15:07:06 +08:00
|
|
|
// OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
|
|
|
|
// GetOpenMPThreadID was called before this routine.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
if (!LocValue.isValid()) {
|
2014-05-07 14:18:01 +08:00
|
|
|
// Generate "ident_t .kmpc_loc.addr;"
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
|
|
|
|
".kmpc_loc.addr");
|
2014-10-10 20:19:54 +08:00
|
|
|
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Elem.second.DebugLoc = AI.getPointer();
|
2014-05-06 18:08:46 +08:00
|
|
|
LocValue = AI;
|
|
|
|
|
|
|
|
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
|
|
|
|
CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
|
2015-02-25 16:32:46 +08:00
|
|
|
CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CGM.getSize(getIdentSize(CGF.CGM)));
|
2014-05-06 18:08:46 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// char **psource = &.kmpc_loc_<flags>.addr.psource;
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
|
2014-05-06 18:08:46 +08:00
|
|
|
|
2014-05-30 13:48:40 +08:00
|
|
|
auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
|
|
|
|
if (OMPDebugLoc == nullptr) {
|
|
|
|
SmallString<128> Buffer2;
|
|
|
|
llvm::raw_svector_ostream OS2(Buffer2);
|
|
|
|
// Build debug location
|
|
|
|
PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
|
|
|
|
OS2 << ";" << PLoc.getFilename() << ";";
|
|
|
|
if (const FunctionDecl *FD =
|
|
|
|
dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
|
|
|
|
OS2 << FD->getQualifiedNameAsString();
|
|
|
|
}
|
|
|
|
OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
|
|
|
|
OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
|
|
|
|
OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
|
2014-05-06 18:08:46 +08:00
|
|
|
}
|
|
|
|
// *psource = ";<File>;<Function>;<Line>;<Column>;;";
|
2014-05-30 13:48:40 +08:00
|
|
|
CGF.Builder.CreateStore(OMPDebugLoc, PSource);
|
|
|
|
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
// Our callers always pass this to a runtime function, so for
|
|
|
|
// convenience, go ahead and return a naked pointer.
|
|
|
|
return LocValue.getPointer();
|
2014-05-06 18:08:46 +08:00
|
|
|
}
|
|
|
|
|
2015-02-25 16:32:46 +08:00
|
|
|
llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
|
|
|
|
SourceLocation Loc) {
|
2014-05-06 18:08:46 +08:00
|
|
|
assert(CGF.CurFn && "No function in current CodeGenFunction.");
|
|
|
|
|
2014-10-08 22:01:46 +08:00
|
|
|
llvm::Value *ThreadID = nullptr;
|
2014-10-10 20:19:54 +08:00
|
|
|
// Check whether we've already cached a load of the thread id in this
|
|
|
|
// function.
|
2014-12-03 20:11:24 +08:00
|
|
|
auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
|
2014-10-10 20:19:54 +08:00
|
|
|
if (I != OpenMPLocThreadIDMap.end()) {
|
|
|
|
ThreadID = I->second.ThreadID;
|
2014-10-21 11:16:40 +08:00
|
|
|
if (ThreadID != nullptr)
|
|
|
|
return ThreadID;
|
|
|
|
}
|
2016-01-22 16:56:50 +08:00
|
|
|
if (auto *OMPRegionInfo =
|
2014-12-03 20:11:24 +08:00
|
|
|
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
if (OMPRegionInfo->getThreadIDVariable()) {
|
2015-02-26 18:27:34 +08:00
|
|
|
// Check if this an outlined function with thread id passed as argument.
|
|
|
|
auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
|
|
|
|
ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
|
|
|
|
// If value loaded in entry block, cache it and use it everywhere in
|
|
|
|
// function.
|
|
|
|
if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
|
|
|
|
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
|
|
|
|
Elem.second.ThreadID = ThreadID;
|
|
|
|
}
|
|
|
|
return ThreadID;
|
2014-07-25 15:55:17 +08:00
|
|
|
}
|
2014-05-06 18:08:46 +08:00
|
|
|
}
|
2015-02-26 18:27:34 +08:00
|
|
|
|
|
|
|
// This is not an outlined function region - need to call __kmpc_int32
|
|
|
|
// kmpc_global_thread_num(ident_t *loc).
|
|
|
|
// Generate thread id value and cache this value for use across the
|
|
|
|
// function.
|
|
|
|
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
|
|
|
|
CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
|
|
|
|
ThreadID =
|
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
|
|
|
|
emitUpdateLocation(CGF, Loc));
|
|
|
|
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
|
|
|
|
Elem.second.ThreadID = ThreadID;
|
2014-10-08 22:01:46 +08:00
|
|
|
return ThreadID;
|
2014-05-06 18:08:46 +08:00
|
|
|
}
|
|
|
|
|
2015-02-25 16:32:46 +08:00
|
|
|
void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
|
2014-05-06 18:08:46 +08:00
|
|
|
assert(CGF.CurFn && "No function in current CodeGenFunction.");
|
2014-10-21 11:16:40 +08:00
|
|
|
if (OpenMPLocThreadIDMap.count(CGF.CurFn))
|
|
|
|
OpenMPLocThreadIDMap.erase(CGF.CurFn);
|
2016-03-04 17:22:22 +08:00
|
|
|
if (FunctionUDRMap.count(CGF.CurFn) > 0) {
|
|
|
|
for(auto *D : FunctionUDRMap[CGF.CurFn]) {
|
|
|
|
UDRMap.erase(D);
|
|
|
|
}
|
|
|
|
FunctionUDRMap.erase(CGF.CurFn);
|
|
|
|
}
|
2014-05-06 18:08:46 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
|
2016-03-29 13:34:15 +08:00
|
|
|
if (!IdentTy) {
|
|
|
|
}
|
2014-05-06 18:08:46 +08:00
|
|
|
return llvm::PointerType::getUnqual(IdentTy);
|
|
|
|
}
|
|
|
|
|
|
|
|
llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
|
2016-03-29 13:34:15 +08:00
|
|
|
if (!Kmpc_MicroTy) {
|
|
|
|
// Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
|
|
|
|
llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
|
|
|
|
llvm::PointerType::getUnqual(CGM.Int32Ty)};
|
|
|
|
Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
|
|
|
|
}
|
2014-05-06 18:08:46 +08:00
|
|
|
return llvm::PointerType::getUnqual(Kmpc_MicroTy);
|
|
|
|
}
|
|
|
|
|
|
|
|
llvm::Constant *
|
2016-02-19 18:38:26 +08:00
|
|
|
CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
|
2014-05-06 18:08:46 +08:00
|
|
|
llvm::Constant *RTLFn = nullptr;
|
2016-02-19 18:38:26 +08:00
|
|
|
switch (static_cast<OpenMPRTLFunction>(Function)) {
|
2014-05-06 18:08:46 +08:00
|
|
|
case OMPRTL__kmpc_fork_call: {
|
|
|
|
// Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
|
|
|
|
// microtask, ...);
|
2014-06-18 15:08:49 +08:00
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
|
|
|
|
getKmpc_MicroPointerTy()};
|
2014-05-06 18:08:46 +08:00
|
|
|
llvm::FunctionType *FnTy =
|
[OPENMP] Codegen for 'if' clause in 'parallel' directive.
Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive.
If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
global_thread_id.addr = alloca i32
store i32 global_thread_id, global_thread_id.addr
zero.addr = alloca i32
store i32 0, zero.addr
kmpc_serialized_parallel(loc, global_thread_id);
microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/);
kmpc_end_serialized_parallel(loc, global_thread_id);
Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call.
Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D4716
llvm-svn: 219597
2014-10-13 14:02:40 +08:00
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
|
2014-05-06 18:08:46 +08:00
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__kmpc_global_thread_num: {
|
|
|
|
// Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
|
2014-06-18 15:08:49 +08:00
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
|
2014-05-06 18:08:46 +08:00
|
|
|
llvm::FunctionType *FnTy =
|
[OPENMP] Codegen for 'if' clause in 'parallel' directive.
Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive.
If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
global_thread_id.addr = alloca i32
store i32 global_thread_id, global_thread_id.addr
zero.addr = alloca i32
store i32 0, zero.addr
kmpc_serialized_parallel(loc, global_thread_id);
microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/);
kmpc_end_serialized_parallel(loc, global_thread_id);
Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call.
Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D4716
llvm-svn: 219597
2014-10-13 14:02:40 +08:00
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
|
2014-05-06 18:08:46 +08:00
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
|
|
|
|
break;
|
|
|
|
}
|
2014-11-11 12:05:39 +08:00
|
|
|
case OMPRTL__kmpc_threadprivate_cached: {
|
|
|
|
// Build void *__kmpc_threadprivate_cached(ident_t *loc,
|
|
|
|
// kmp_int32 global_tid, void *data, size_t size, void ***cache);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
|
|
|
|
CGM.VoidPtrTy, CGM.SizeTy,
|
|
|
|
CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
|
|
|
|
break;
|
|
|
|
}
|
2014-09-22 18:01:53 +08:00
|
|
|
case OMPRTL__kmpc_critical: {
|
2014-09-22 20:32:31 +08:00
|
|
|
// Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_critical_name *crit);
|
2014-09-22 18:01:53 +08:00
|
|
|
llvm::Type *TypeParams[] = {
|
|
|
|
getIdentTyPointerTy(), CGM.Int32Ty,
|
|
|
|
llvm::PointerType::getUnqual(KmpCriticalNameTy)};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
|
|
|
|
break;
|
|
|
|
}
|
2015-12-15 18:55:09 +08:00
|
|
|
case OMPRTL__kmpc_critical_with_hint: {
|
|
|
|
// Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_critical_name *crit, uintptr_t hint);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
|
|
|
|
llvm::PointerType::getUnqual(KmpCriticalNameTy),
|
|
|
|
CGM.IntPtrTy};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
|
|
|
|
break;
|
|
|
|
}
|
2014-11-11 12:05:39 +08:00
|
|
|
case OMPRTL__kmpc_threadprivate_register: {
|
|
|
|
// Build void __kmpc_threadprivate_register(ident_t *, void *data,
|
|
|
|
// kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
|
|
|
|
// typedef void *(*kmpc_ctor)(void *);
|
|
|
|
auto KmpcCtorTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
|
|
|
|
/*isVarArg*/ false)->getPointerTo();
|
|
|
|
// typedef void *(*kmpc_cctor)(void *, void *);
|
|
|
|
llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
|
|
|
|
auto KmpcCopyCtorTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
|
|
|
|
/*isVarArg*/ false)->getPointerTo();
|
|
|
|
// typedef void (*kmpc_dtor)(void *);
|
|
|
|
auto KmpcDtorTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
|
|
|
|
->getPointerTo();
|
|
|
|
llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
|
|
|
|
KmpcCopyCtorTy, KmpcDtorTy};
|
|
|
|
auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
|
|
|
|
/*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
|
|
|
|
break;
|
|
|
|
}
|
2014-09-22 18:01:53 +08:00
|
|
|
case OMPRTL__kmpc_end_critical: {
|
2014-09-22 20:32:31 +08:00
|
|
|
// Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_critical_name *crit);
|
2014-09-22 18:01:53 +08:00
|
|
|
llvm::Type *TypeParams[] = {
|
|
|
|
getIdentTyPointerTy(), CGM.Int32Ty,
|
|
|
|
llvm::PointerType::getUnqual(KmpCriticalNameTy)};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
|
|
|
|
break;
|
|
|
|
}
|
2014-12-05 12:09:23 +08:00
|
|
|
case OMPRTL__kmpc_cancel_barrier: {
|
|
|
|
// Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
|
|
|
|
// global_tid);
|
2014-10-08 22:01:46 +08:00
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
2014-12-05 12:09:23 +08:00
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
|
2014-10-08 22:01:46 +08:00
|
|
|
break;
|
|
|
|
}
|
2015-07-03 17:56:58 +08:00
|
|
|
case OMPRTL__kmpc_barrier: {
|
2015-07-06 13:50:32 +08:00
|
|
|
// Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
|
2015-07-03 17:56:58 +08:00
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
|
|
|
|
break;
|
|
|
|
}
|
2014-12-15 15:07:06 +08:00
|
|
|
case OMPRTL__kmpc_for_static_fini: {
|
|
|
|
// Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
|
|
|
|
break;
|
|
|
|
}
|
2014-10-13 16:23:51 +08:00
|
|
|
case OMPRTL__kmpc_push_num_threads: {
|
|
|
|
// Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_int32 num_threads)
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
|
|
|
|
CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
|
|
|
|
break;
|
|
|
|
}
|
[OPENMP] Codegen for 'if' clause in 'parallel' directive.
Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive.
If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
global_thread_id.addr = alloca i32
store i32 global_thread_id, global_thread_id.addr
zero.addr = alloca i32
store i32 0, zero.addr
kmpc_serialized_parallel(loc, global_thread_id);
microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/);
kmpc_end_serialized_parallel(loc, global_thread_id);
Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call.
Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D4716
llvm-svn: 219597
2014-10-13 14:02:40 +08:00
|
|
|
case OMPRTL__kmpc_serialized_parallel: {
|
|
|
|
// Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
|
|
|
|
// global_tid);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__kmpc_end_serialized_parallel: {
|
|
|
|
// Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
|
|
|
|
// global_tid);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
|
|
|
|
break;
|
|
|
|
}
|
2014-11-20 12:34:54 +08:00
|
|
|
case OMPRTL__kmpc_flush: {
|
2015-02-24 20:55:09 +08:00
|
|
|
// Build void __kmpc_flush(ident_t *loc);
|
2014-11-20 12:34:54 +08:00
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
|
|
|
|
llvm::FunctionType *FnTy =
|
2015-02-24 20:55:09 +08:00
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
|
2014-11-20 12:34:54 +08:00
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
|
|
|
|
break;
|
|
|
|
}
|
2014-12-04 15:23:53 +08:00
|
|
|
case OMPRTL__kmpc_master: {
|
|
|
|
// Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__kmpc_end_master: {
|
|
|
|
// Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
|
|
|
|
break;
|
|
|
|
}
|
2015-02-05 13:57:51 +08:00
|
|
|
case OMPRTL__kmpc_omp_taskyield: {
|
|
|
|
// Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
|
|
|
|
// int end_part);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
|
|
|
|
break;
|
|
|
|
}
|
2015-02-05 14:35:41 +08:00
|
|
|
case OMPRTL__kmpc_single: {
|
|
|
|
// Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__kmpc_end_single: {
|
|
|
|
// Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
|
|
|
|
break;
|
|
|
|
}
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
case OMPRTL__kmpc_omp_task_alloc: {
|
|
|
|
// Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
|
|
|
|
// kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
|
|
|
|
// kmp_routine_entry_t *task_entry);
|
|
|
|
assert(KmpRoutineEntryPtrTy != nullptr &&
|
|
|
|
"Type kmp_routine_entry_t must be created.");
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
|
|
|
|
CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
|
|
|
|
// Return void * and then cast to particular kmp_task_t type.
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__kmpc_omp_task: {
|
|
|
|
// Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
|
|
|
|
// *new_task);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
|
|
|
|
CGM.VoidPtrTy};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
|
|
|
|
break;
|
|
|
|
}
|
2015-03-23 14:18:07 +08:00
|
|
|
case OMPRTL__kmpc_copyprivate: {
|
|
|
|
// Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
|
2015-04-30 11:47:32 +08:00
|
|
|
// size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
|
2015-03-23 14:18:07 +08:00
|
|
|
// kmp_int32 didit);
|
|
|
|
llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
|
|
|
|
auto *CpyFnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
|
2015-04-30 11:47:32 +08:00
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
|
2015-03-23 14:18:07 +08:00
|
|
|
CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
|
|
|
|
CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
|
|
|
|
break;
|
|
|
|
}
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
case OMPRTL__kmpc_reduce: {
|
|
|
|
// Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
|
|
|
|
// (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
|
|
|
|
llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
|
|
|
|
auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
|
|
|
|
/*isVarArg=*/false);
|
|
|
|
llvm::Type *TypeParams[] = {
|
|
|
|
getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
|
|
|
|
CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
|
|
|
|
llvm::PointerType::getUnqual(KmpCriticalNameTy)};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__kmpc_reduce_nowait: {
|
|
|
|
// Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
|
|
|
|
// global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
|
|
|
|
// void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
|
|
|
|
// *lck);
|
|
|
|
llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
|
|
|
|
auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
|
|
|
|
/*isVarArg=*/false);
|
|
|
|
llvm::Type *TypeParams[] = {
|
|
|
|
getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
|
|
|
|
CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
|
|
|
|
llvm::PointerType::getUnqual(KmpCriticalNameTy)};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__kmpc_end_reduce: {
|
|
|
|
// Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_critical_name *lck);
|
|
|
|
llvm::Type *TypeParams[] = {
|
|
|
|
getIdentTyPointerTy(), CGM.Int32Ty,
|
|
|
|
llvm::PointerType::getUnqual(KmpCriticalNameTy)};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__kmpc_end_reduce_nowait: {
|
|
|
|
// Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_critical_name *lck);
|
|
|
|
llvm::Type *TypeParams[] = {
|
|
|
|
getIdentTyPointerTy(), CGM.Int32Ty,
|
|
|
|
llvm::PointerType::getUnqual(KmpCriticalNameTy)};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn =
|
|
|
|
CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
|
|
|
|
break;
|
|
|
|
}
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
case OMPRTL__kmpc_omp_task_begin_if0: {
|
|
|
|
// Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
|
|
|
|
// *new_task);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
|
|
|
|
CGM.VoidPtrTy};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn =
|
|
|
|
CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__kmpc_omp_task_complete_if0: {
|
|
|
|
// Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
|
|
|
|
// *new_task);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
|
|
|
|
CGM.VoidPtrTy};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy,
|
|
|
|
/*Name=*/"__kmpc_omp_task_complete_if0");
|
|
|
|
break;
|
|
|
|
}
|
2015-04-22 19:15:40 +08:00
|
|
|
case OMPRTL__kmpc_ordered: {
|
|
|
|
// Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__kmpc_end_ordered: {
|
2015-06-18 20:14:09 +08:00
|
|
|
// Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
|
2015-04-22 19:15:40 +08:00
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
|
|
|
|
break;
|
|
|
|
}
|
2015-04-27 13:22:09 +08:00
|
|
|
case OMPRTL__kmpc_omp_taskwait: {
|
|
|
|
// Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
|
|
|
|
break;
|
|
|
|
}
|
2015-06-18 20:14:09 +08:00
|
|
|
case OMPRTL__kmpc_taskgroup: {
|
|
|
|
// Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__kmpc_end_taskgroup: {
|
|
|
|
// Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
|
|
|
|
break;
|
|
|
|
}
|
2015-06-18 21:40:03 +08:00
|
|
|
case OMPRTL__kmpc_push_proc_bind: {
|
|
|
|
// Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// int proc_bind)
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
|
|
|
|
break;
|
|
|
|
}
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
case OMPRTL__kmpc_omp_task_with_deps: {
|
|
|
|
// Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
|
|
|
|
// kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
|
|
|
|
// kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
|
|
|
|
llvm::Type *TypeParams[] = {
|
|
|
|
getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
|
|
|
|
CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn =
|
|
|
|
CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__kmpc_omp_wait_deps: {
|
|
|
|
// Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
|
|
|
|
// kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
|
|
|
|
// kmp_depend_info_t *noalias_dep_list);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
|
|
|
|
CGM.Int32Ty, CGM.VoidPtrTy,
|
|
|
|
CGM.Int32Ty, CGM.VoidPtrTy};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
|
|
|
|
break;
|
|
|
|
}
|
2015-07-02 12:17:07 +08:00
|
|
|
case OMPRTL__kmpc_cancellationpoint: {
|
|
|
|
// Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
|
|
|
|
// global_tid, kmp_int32 cncl_kind)
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
|
|
|
|
break;
|
|
|
|
}
|
2015-07-06 13:50:32 +08:00
|
|
|
case OMPRTL__kmpc_cancel: {
|
|
|
|
// Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_int32 cncl_kind)
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
|
|
|
|
break;
|
|
|
|
}
|
2016-03-04 04:34:23 +08:00
|
|
|
case OMPRTL__kmpc_push_num_teams: {
|
|
|
|
// Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
|
|
|
|
// kmp_int32 num_teams, kmp_int32 num_threads)
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
|
|
|
|
CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__kmpc_fork_teams: {
|
|
|
|
// Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
|
|
|
|
// microtask, ...);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
|
|
|
|
getKmpc_MicroPointerTy()};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
|
|
|
|
break;
|
|
|
|
}
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
case OMPRTL__kmpc_taskloop: {
|
|
|
|
// Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
|
|
|
|
// if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
|
|
|
|
// sched, kmp_uint64 grainsize, void *task_dup);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
|
|
|
|
CGM.IntTy,
|
|
|
|
CGM.VoidPtrTy,
|
|
|
|
CGM.IntTy,
|
|
|
|
CGM.Int64Ty->getPointerTo(),
|
|
|
|
CGM.Int64Ty->getPointerTo(),
|
|
|
|
CGM.Int64Ty,
|
|
|
|
CGM.IntTy,
|
|
|
|
CGM.IntTy,
|
|
|
|
CGM.Int64Ty,
|
|
|
|
CGM.VoidPtrTy};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
|
|
|
|
break;
|
|
|
|
}
|
2016-05-25 20:36:08 +08:00
|
|
|
case OMPRTL__kmpc_doacross_init: {
|
|
|
|
// Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
|
|
|
|
// num_dims, struct kmp_dim *dims);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
|
|
|
|
CGM.Int32Ty,
|
|
|
|
CGM.Int32Ty,
|
|
|
|
CGM.VoidPtrTy};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__kmpc_doacross_fini: {
|
|
|
|
// Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__kmpc_doacross_post: {
|
|
|
|
// Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
|
|
|
|
// *vec);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
|
|
|
|
CGM.Int64Ty->getPointerTo()};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__kmpc_doacross_wait: {
|
|
|
|
// Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
|
|
|
|
// *vec);
|
|
|
|
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
|
|
|
|
CGM.Int64Ty->getPointerTo()};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
|
|
|
|
break;
|
|
|
|
}
|
2017-07-17 21:30:36 +08:00
|
|
|
case OMPRTL__kmpc_task_reduction_init: {
|
|
|
|
// Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
|
|
|
|
// *data);
|
|
|
|
llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn =
|
|
|
|
CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__kmpc_task_reduction_get_th_data: {
|
|
|
|
// Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
|
|
|
|
// *d);
|
|
|
|
llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(
|
|
|
|
FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
|
|
|
|
break;
|
|
|
|
}
|
2015-10-03 00:14:20 +08:00
|
|
|
case OMPRTL__tgt_target: {
|
|
|
|
// Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
|
|
|
|
// arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
|
|
|
|
// *arg_types);
|
|
|
|
llvm::Type *TypeParams[] = {CGM.Int32Ty,
|
|
|
|
CGM.VoidPtrTy,
|
|
|
|
CGM.Int32Ty,
|
|
|
|
CGM.VoidPtrPtrTy,
|
|
|
|
CGM.VoidPtrPtrTy,
|
|
|
|
CGM.SizeTy->getPointerTo(),
|
|
|
|
CGM.Int32Ty->getPointerTo()};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
|
|
|
|
break;
|
|
|
|
}
|
2016-03-04 00:20:23 +08:00
|
|
|
case OMPRTL__tgt_target_teams: {
|
|
|
|
// Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
|
|
|
|
// int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
|
|
|
|
// int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
|
|
|
|
llvm::Type *TypeParams[] = {CGM.Int32Ty,
|
|
|
|
CGM.VoidPtrTy,
|
|
|
|
CGM.Int32Ty,
|
|
|
|
CGM.VoidPtrPtrTy,
|
|
|
|
CGM.VoidPtrPtrTy,
|
|
|
|
CGM.SizeTy->getPointerTo(),
|
|
|
|
CGM.Int32Ty->getPointerTo(),
|
|
|
|
CGM.Int32Ty,
|
|
|
|
CGM.Int32Ty};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
|
|
|
|
break;
|
|
|
|
}
|
2016-01-06 21:42:12 +08:00
|
|
|
case OMPRTL__tgt_register_lib: {
|
|
|
|
// Build void __tgt_register_lib(__tgt_bin_desc *desc);
|
|
|
|
QualType ParamTy =
|
|
|
|
CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
|
|
|
|
llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__tgt_unregister_lib: {
|
|
|
|
// Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
|
|
|
|
QualType ParamTy =
|
|
|
|
CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
|
|
|
|
llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
|
|
|
|
break;
|
|
|
|
}
|
2016-04-28 06:58:19 +08:00
|
|
|
case OMPRTL__tgt_target_data_begin: {
|
|
|
|
// Build void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
|
|
|
|
// void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
|
|
|
|
llvm::Type *TypeParams[] = {CGM.Int32Ty,
|
|
|
|
CGM.Int32Ty,
|
|
|
|
CGM.VoidPtrPtrTy,
|
|
|
|
CGM.VoidPtrPtrTy,
|
|
|
|
CGM.SizeTy->getPointerTo(),
|
|
|
|
CGM.Int32Ty->getPointerTo()};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPRTL__tgt_target_data_end: {
|
|
|
|
// Build void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
|
|
|
|
// void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
|
|
|
|
llvm::Type *TypeParams[] = {CGM.Int32Ty,
|
|
|
|
CGM.Int32Ty,
|
|
|
|
CGM.VoidPtrPtrTy,
|
|
|
|
CGM.VoidPtrPtrTy,
|
|
|
|
CGM.SizeTy->getPointerTo(),
|
|
|
|
CGM.Int32Ty->getPointerTo()};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
|
|
|
|
break;
|
|
|
|
}
|
2016-05-27 02:30:22 +08:00
|
|
|
case OMPRTL__tgt_target_data_update: {
|
|
|
|
// Build void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
|
|
|
|
// void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
|
|
|
|
llvm::Type *TypeParams[] = {CGM.Int32Ty,
|
|
|
|
CGM.Int32Ty,
|
|
|
|
CGM.VoidPtrPtrTy,
|
|
|
|
CGM.VoidPtrPtrTy,
|
|
|
|
CGM.SizeTy->getPointerTo(),
|
|
|
|
CGM.Int32Ty->getPointerTo()};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
|
|
|
|
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
|
|
|
|
break;
|
|
|
|
}
|
2014-05-06 18:08:46 +08:00
|
|
|
}
|
2016-02-19 18:38:26 +08:00
|
|
|
assert(RTLFn && "Unable to find OpenMP runtime function");
|
2014-05-06 18:08:46 +08:00
|
|
|
return RTLFn;
|
|
|
|
}
|
2014-09-22 18:01:53 +08:00
|
|
|
|
2015-03-13 18:38:23 +08:00
|
|
|
llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
|
|
|
|
bool IVSigned) {
|
|
|
|
assert((IVSize == 32 || IVSize == 64) &&
|
|
|
|
"IV size is not compatible with the omp runtime");
|
|
|
|
auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
|
|
|
|
: "__kmpc_for_static_init_4u")
|
|
|
|
: (IVSigned ? "__kmpc_for_static_init_8"
|
|
|
|
: "__kmpc_for_static_init_8u");
|
|
|
|
auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
|
|
|
|
auto PtrTy = llvm::PointerType::getUnqual(ITy);
|
|
|
|
llvm::Type *TypeParams[] = {
|
|
|
|
getIdentTyPointerTy(), // loc
|
|
|
|
CGM.Int32Ty, // tid
|
|
|
|
CGM.Int32Ty, // schedtype
|
|
|
|
llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
|
|
|
|
PtrTy, // p_lower
|
|
|
|
PtrTy, // p_upper
|
|
|
|
PtrTy, // p_stride
|
|
|
|
ITy, // incr
|
|
|
|
ITy // chunk
|
|
|
|
};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
|
|
|
|
return CGM.CreateRuntimeFunction(FnTy, Name);
|
|
|
|
}
|
|
|
|
|
2015-03-12 21:37:50 +08:00
|
|
|
llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
|
|
|
|
bool IVSigned) {
|
|
|
|
assert((IVSize == 32 || IVSize == 64) &&
|
|
|
|
"IV size is not compatible with the omp runtime");
|
|
|
|
auto Name =
|
|
|
|
IVSize == 32
|
|
|
|
? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
|
|
|
|
: (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
|
|
|
|
auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
|
|
|
|
llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
|
|
|
|
CGM.Int32Ty, // tid
|
|
|
|
CGM.Int32Ty, // schedtype
|
|
|
|
ITy, // lower
|
|
|
|
ITy, // upper
|
|
|
|
ITy, // stride
|
|
|
|
ITy // chunk
|
|
|
|
};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
|
|
|
|
return CGM.CreateRuntimeFunction(FnTy, Name);
|
|
|
|
}
|
|
|
|
|
2015-04-22 19:15:40 +08:00
|
|
|
llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
|
|
|
|
bool IVSigned) {
|
|
|
|
assert((IVSize == 32 || IVSize == 64) &&
|
|
|
|
"IV size is not compatible with the omp runtime");
|
|
|
|
auto Name =
|
|
|
|
IVSize == 32
|
|
|
|
? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
|
|
|
|
: (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
|
|
|
|
llvm::Type *TypeParams[] = {
|
|
|
|
getIdentTyPointerTy(), // loc
|
|
|
|
CGM.Int32Ty, // tid
|
|
|
|
};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
|
|
|
|
return CGM.CreateRuntimeFunction(FnTy, Name);
|
|
|
|
}
|
|
|
|
|
2015-03-12 21:37:50 +08:00
|
|
|
llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
|
|
|
|
bool IVSigned) {
|
|
|
|
assert((IVSize == 32 || IVSize == 64) &&
|
|
|
|
"IV size is not compatible with the omp runtime");
|
|
|
|
auto Name =
|
|
|
|
IVSize == 32
|
|
|
|
? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
|
|
|
|
: (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
|
|
|
|
auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
|
|
|
|
auto PtrTy = llvm::PointerType::getUnqual(ITy);
|
|
|
|
llvm::Type *TypeParams[] = {
|
|
|
|
getIdentTyPointerTy(), // loc
|
|
|
|
CGM.Int32Ty, // tid
|
|
|
|
llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
|
|
|
|
PtrTy, // p_lower
|
|
|
|
PtrTy, // p_upper
|
|
|
|
PtrTy // p_stride
|
|
|
|
};
|
|
|
|
llvm::FunctionType *FnTy =
|
|
|
|
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
|
|
|
|
return CGM.CreateRuntimeFunction(FnTy, Name);
|
|
|
|
}
|
|
|
|
|
2014-11-11 12:05:39 +08:00
|
|
|
llvm::Constant *
|
|
|
|
CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
|
2015-07-14 06:54:53 +08:00
|
|
|
assert(!CGM.getLangOpts().OpenMPUseTLS ||
|
|
|
|
!CGM.getContext().getTargetInfo().isTLSSupported());
|
2014-11-11 12:05:39 +08:00
|
|
|
// Lookup the entry, lazily creating it if necessary.
|
2015-02-25 16:32:46 +08:00
|
|
|
return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
|
2014-11-11 12:05:39 +08:00
|
|
|
Twine(CGM.getMangledName(VD)) + ".cache.");
|
|
|
|
}
|
|
|
|
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
|
|
|
|
const VarDecl *VD,
|
|
|
|
Address VDAddr,
|
|
|
|
SourceLocation Loc) {
|
2015-07-14 06:54:53 +08:00
|
|
|
if (CGM.getLangOpts().OpenMPUseTLS &&
|
|
|
|
CGM.getContext().getTargetInfo().isTLSSupported())
|
|
|
|
return VDAddr;
|
|
|
|
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
auto VarTy = VDAddr.getElementType();
|
2015-02-25 16:32:46 +08:00
|
|
|
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
|
|
|
|
CGM.Int8PtrTy),
|
2014-11-11 12:05:39 +08:00
|
|
|
CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
|
|
|
|
getOrCreateThreadPrivateCache(VD)};
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
return Address(CGF.EmitRuntimeCall(
|
|
|
|
createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
|
|
|
|
VDAddr.getAlignment());
|
2014-11-11 12:05:39 +08:00
|
|
|
}
|
|
|
|
|
2015-02-25 16:32:46 +08:00
|
|
|
void CGOpenMPRuntime::emitThreadPrivateVarInit(
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
|
2014-11-11 12:05:39 +08:00
|
|
|
llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
|
|
|
|
// Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
|
|
|
|
// library.
|
2015-02-25 16:32:46 +08:00
|
|
|
auto OMPLoc = emitUpdateLocation(CGF, Loc);
|
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
|
2014-11-11 12:05:39 +08:00
|
|
|
OMPLoc);
|
|
|
|
// Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
|
|
|
|
// to register constructor/destructor for variable.
|
|
|
|
llvm::Value *Args[] = {OMPLoc,
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
|
|
|
|
CGM.VoidPtrTy),
|
2014-11-11 12:05:39 +08:00
|
|
|
Ctor, CopyCtor, Dtor};
|
2014-12-03 20:11:24 +08:00
|
|
|
CGF.EmitRuntimeCall(
|
2015-02-25 16:32:46 +08:00
|
|
|
createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
|
2014-11-11 12:05:39 +08:00
|
|
|
}
|
|
|
|
|
2015-02-25 16:32:46 +08:00
|
|
|
llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
const VarDecl *VD, Address VDAddr, SourceLocation Loc,
|
2014-11-11 12:05:39 +08:00
|
|
|
bool PerformInit, CodeGenFunction *CGF) {
|
2015-07-14 06:54:53 +08:00
|
|
|
if (CGM.getLangOpts().OpenMPUseTLS &&
|
|
|
|
CGM.getContext().getTargetInfo().isTLSSupported())
|
|
|
|
return nullptr;
|
|
|
|
|
2014-11-11 12:05:39 +08:00
|
|
|
VD = VD->getDefinition(CGM.getContext());
|
|
|
|
if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
|
|
|
|
ThreadPrivateWithDefinition.insert(VD);
|
|
|
|
QualType ASTTy = VD->getType();
|
|
|
|
|
|
|
|
llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
|
|
|
|
auto Init = VD->getAnyInitializer();
|
|
|
|
if (CGM.getLangOpts().CPlusPlus && PerformInit) {
|
|
|
|
// Generate function that re-emits the declaration's initializer into the
|
|
|
|
// threadprivate copy of the variable VD
|
|
|
|
CodeGenFunction CtorCGF(CGM);
|
|
|
|
FunctionArgList Args;
|
2017-06-09 21:40:18 +08:00
|
|
|
ImplicitParamDecl Dst(CGM.getContext(), CGM.getContext().VoidPtrTy,
|
|
|
|
ImplicitParamDecl::Other);
|
2014-11-11 12:05:39 +08:00
|
|
|
Args.push_back(&Dst);
|
|
|
|
|
2016-03-11 12:30:31 +08:00
|
|
|
auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
|
|
|
|
CGM.getContext().VoidPtrTy, Args);
|
2014-11-11 12:05:39 +08:00
|
|
|
auto FTy = CGM.getTypes().GetFunctionType(FI);
|
|
|
|
auto Fn = CGM.CreateGlobalInitOrDestructFunction(
|
2015-10-31 09:28:07 +08:00
|
|
|
FTy, ".__kmpc_global_ctor_.", FI, Loc);
|
2014-11-11 12:05:39 +08:00
|
|
|
CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
|
|
|
|
Args, SourceLocation());
|
|
|
|
auto ArgVal = CtorCGF.EmitLoadOfScalar(
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
|
2014-11-11 12:05:39 +08:00
|
|
|
CGM.getContext().VoidPtrTy, Dst.getLocation());
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address Arg = Address(ArgVal, VDAddr.getAlignment());
|
|
|
|
Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
|
|
|
|
CtorCGF.ConvertTypeForMem(ASTTy));
|
2014-11-11 12:05:39 +08:00
|
|
|
CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
|
|
|
|
/*IsInitializer=*/true);
|
|
|
|
ArgVal = CtorCGF.EmitLoadOfScalar(
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
|
2014-11-11 12:05:39 +08:00
|
|
|
CGM.getContext().VoidPtrTy, Dst.getLocation());
|
|
|
|
CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
|
|
|
|
CtorCGF.FinishFunction();
|
|
|
|
Ctor = Fn;
|
|
|
|
}
|
|
|
|
if (VD->getType().isDestructedType() != QualType::DK_none) {
|
|
|
|
// Generate function that emits destructor call for the threadprivate copy
|
|
|
|
// of the variable VD
|
|
|
|
CodeGenFunction DtorCGF(CGM);
|
|
|
|
FunctionArgList Args;
|
2017-06-09 21:40:18 +08:00
|
|
|
ImplicitParamDecl Dst(CGM.getContext(), CGM.getContext().VoidPtrTy,
|
|
|
|
ImplicitParamDecl::Other);
|
2014-11-11 12:05:39 +08:00
|
|
|
Args.push_back(&Dst);
|
|
|
|
|
2016-03-11 12:30:31 +08:00
|
|
|
auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
|
|
|
|
CGM.getContext().VoidTy, Args);
|
2014-11-11 12:05:39 +08:00
|
|
|
auto FTy = CGM.getTypes().GetFunctionType(FI);
|
|
|
|
auto Fn = CGM.CreateGlobalInitOrDestructFunction(
|
2015-10-31 09:28:07 +08:00
|
|
|
FTy, ".__kmpc_global_dtor_.", FI, Loc);
|
2016-04-25 06:22:29 +08:00
|
|
|
auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
|
2014-11-11 12:05:39 +08:00
|
|
|
DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
|
|
|
|
SourceLocation());
|
2016-04-25 06:22:29 +08:00
|
|
|
// Create a scope with an artificial location for the body of this function.
|
|
|
|
auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
|
2014-11-11 12:05:39 +08:00
|
|
|
auto ArgVal = DtorCGF.EmitLoadOfScalar(
|
|
|
|
DtorCGF.GetAddrOfLocalVar(&Dst),
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
/*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
|
|
|
|
DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
|
2014-11-11 12:05:39 +08:00
|
|
|
DtorCGF.getDestroyer(ASTTy.isDestructedType()),
|
|
|
|
DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
|
|
|
|
DtorCGF.FinishFunction();
|
|
|
|
Dtor = Fn;
|
|
|
|
}
|
|
|
|
// Do not emit init function if it is not required.
|
|
|
|
if (!Ctor && !Dtor)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
|
|
|
|
auto CopyCtorTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
|
|
|
|
/*isVarArg=*/false)->getPointerTo();
|
|
|
|
// Copying constructor for the threadprivate variable.
|
|
|
|
// Must be NULL - reserved by runtime, but currently it requires that this
|
|
|
|
// parameter is always NULL. Otherwise it fires assertion.
|
|
|
|
CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
|
|
|
|
if (Ctor == nullptr) {
|
|
|
|
auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
|
|
|
|
/*isVarArg=*/false)->getPointerTo();
|
|
|
|
Ctor = llvm::Constant::getNullValue(CtorTy);
|
|
|
|
}
|
|
|
|
if (Dtor == nullptr) {
|
|
|
|
auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
|
|
|
|
/*isVarArg=*/false)->getPointerTo();
|
|
|
|
Dtor = llvm::Constant::getNullValue(DtorTy);
|
|
|
|
}
|
|
|
|
if (!CGF) {
|
|
|
|
auto InitFunctionTy =
|
|
|
|
llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
|
|
|
|
auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
|
2015-10-31 09:28:07 +08:00
|
|
|
InitFunctionTy, ".__omp_threadprivate_init_.",
|
|
|
|
CGM.getTypes().arrangeNullaryFunction());
|
2014-11-11 12:05:39 +08:00
|
|
|
CodeGenFunction InitCGF(CGM);
|
|
|
|
FunctionArgList ArgList;
|
|
|
|
InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
|
|
|
|
CGM.getTypes().arrangeNullaryFunction(), ArgList,
|
|
|
|
Loc);
|
2015-02-25 16:32:46 +08:00
|
|
|
emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
|
2014-11-11 12:05:39 +08:00
|
|
|
InitCGF.FinishFunction();
|
|
|
|
return InitFunction;
|
|
|
|
}
|
2015-02-25 16:32:46 +08:00
|
|
|
emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
|
2014-11-11 12:05:39 +08:00
|
|
|
}
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2017-07-17 21:30:36 +08:00
|
|
|
Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
|
|
|
|
QualType VarType,
|
|
|
|
StringRef Name) {
|
|
|
|
llvm::Twine VarName(Name, ".artificial.");
|
|
|
|
llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
|
|
|
|
llvm::Value *GAddr = getOrCreateInternalVariable(VarLVType, VarName);
|
|
|
|
llvm::Value *Args[] = {
|
|
|
|
emitUpdateLocation(CGF, SourceLocation()),
|
|
|
|
getThreadID(CGF, SourceLocation()),
|
|
|
|
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
|
|
|
|
CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
|
|
|
|
/*IsSigned=*/false),
|
|
|
|
getOrCreateInternalVariable(CGM.VoidPtrPtrTy, VarName + ".cache.")};
|
|
|
|
return Address(
|
|
|
|
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
|
CGF.EmitRuntimeCall(
|
|
|
|
createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
|
|
|
|
VarLVType->getPointerTo(/*AddrSpace=*/0)),
|
|
|
|
CGM.getPointerAlign());
|
|
|
|
}
|
|
|
|
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
/// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
|
|
|
|
/// function. Here is the logic:
|
|
|
|
/// if (Cond) {
|
|
|
|
/// ThenGen();
|
|
|
|
/// } else {
|
|
|
|
/// ElseGen();
|
|
|
|
/// }
|
2017-01-10 23:42:51 +08:00
|
|
|
void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
|
|
|
|
const RegionCodeGenTy &ThenGen,
|
|
|
|
const RegionCodeGenTy &ElseGen) {
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
|
|
|
|
|
|
|
|
// If the condition constant folds and can be elided, try to avoid emitting
|
|
|
|
// the condition and the dead arm of the if/else.
|
|
|
|
bool CondConstant;
|
|
|
|
if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
|
2016-03-29 13:34:15 +08:00
|
|
|
if (CondConstant)
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
ThenGen(CGF);
|
2016-03-29 13:34:15 +08:00
|
|
|
else
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
ElseGen(CGF);
|
|
|
|
return;
|
|
|
|
}
|
2014-10-08 22:01:46 +08:00
|
|
|
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
// Otherwise, the condition did not fold, or we couldn't elide it. Just
|
|
|
|
// emit the conditional branch.
|
|
|
|
auto ThenBlock = CGF.createBasicBlock("omp_if.then");
|
|
|
|
auto ElseBlock = CGF.createBasicBlock("omp_if.else");
|
|
|
|
auto ContBlock = CGF.createBasicBlock("omp_if.end");
|
|
|
|
CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
|
[OPENMP] Codegen for 'if' clause in 'parallel' directive.
Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive.
If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
global_thread_id.addr = alloca i32
store i32 global_thread_id, global_thread_id.addr
zero.addr = alloca i32
store i32 0, zero.addr
kmpc_serialized_parallel(loc, global_thread_id);
microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/);
kmpc_end_serialized_parallel(loc, global_thread_id);
Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call.
Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D4716
llvm-svn: 219597
2014-10-13 14:02:40 +08:00
|
|
|
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
// Emit the 'then' code.
|
|
|
|
CGF.EmitBlock(ThenBlock);
|
2016-03-29 13:34:15 +08:00
|
|
|
ThenGen(CGF);
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
CGF.EmitBranch(ContBlock);
|
|
|
|
// Emit the 'else' code if present.
|
2016-03-29 13:34:15 +08:00
|
|
|
// There is no need to emit line number for unconditional branch.
|
|
|
|
(void)ApplyDebugLocation::CreateEmpty(CGF);
|
|
|
|
CGF.EmitBlock(ElseBlock);
|
|
|
|
ElseGen(CGF);
|
|
|
|
// There is no need to emit line number for unconditional branch.
|
|
|
|
(void)ApplyDebugLocation::CreateEmpty(CGF);
|
|
|
|
CGF.EmitBranch(ContBlock);
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
// Emit the continuation block for code after the if.
|
|
|
|
CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
|
|
|
|
}
|
[OPENMP] Codegen for 'if' clause in 'parallel' directive.
Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive.
If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
global_thread_id.addr = alloca i32
store i32 global_thread_id, global_thread_id.addr
zero.addr = alloca i32
store i32 0, zero.addr
kmpc_serialized_parallel(loc, global_thread_id);
microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/);
kmpc_end_serialized_parallel(loc, global_thread_id);
Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call.
Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D4716
llvm-svn: 219597
2014-10-13 14:02:40 +08:00
|
|
|
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
|
|
|
|
llvm::Value *OutlinedFn,
|
2015-09-10 16:12:02 +08:00
|
|
|
ArrayRef<llvm::Value *> CapturedVars,
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
const Expr *IfCond) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
auto *RTLoc = emitUpdateLocation(CGF, Loc);
|
2016-03-29 13:34:15 +08:00
|
|
|
auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
|
|
|
|
PrePostActionTy &) {
|
2015-09-10 16:12:02 +08:00
|
|
|
// Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
|
2016-03-29 13:34:15 +08:00
|
|
|
auto &RT = CGF.CGM.getOpenMPRuntime();
|
2015-09-10 16:12:02 +08:00
|
|
|
llvm::Value *Args[] = {
|
|
|
|
RTLoc,
|
|
|
|
CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
|
2016-03-29 13:34:15 +08:00
|
|
|
CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
|
2015-09-10 16:12:02 +08:00
|
|
|
llvm::SmallVector<llvm::Value *, 16> RealArgs;
|
|
|
|
RealArgs.append(std::begin(Args), std::end(Args));
|
|
|
|
RealArgs.append(CapturedVars.begin(), CapturedVars.end());
|
|
|
|
|
2016-03-29 13:34:15 +08:00
|
|
|
auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
|
2015-09-10 16:12:02 +08:00
|
|
|
CGF.EmitRuntimeCall(RTLFn, RealArgs);
|
|
|
|
};
|
2016-03-29 13:34:15 +08:00
|
|
|
auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
|
|
|
|
PrePostActionTy &) {
|
|
|
|
auto &RT = CGF.CGM.getOpenMPRuntime();
|
|
|
|
auto ThreadID = RT.getThreadID(CGF, Loc);
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
// Build calls:
|
|
|
|
// __kmpc_serialized_parallel(&Loc, GTid);
|
|
|
|
llvm::Value *Args[] = {RTLoc, ThreadID};
|
2016-03-29 13:34:15 +08:00
|
|
|
CGF.EmitRuntimeCall(
|
|
|
|
RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
|
|
|
|
// OutlinedFn(>id, &zero, CapturedStruct);
|
2016-03-29 13:34:15 +08:00
|
|
|
auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address ZeroAddr =
|
2016-03-29 13:34:15 +08:00
|
|
|
CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
|
|
|
|
/*Name*/ ".zero.addr");
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
|
2015-09-10 16:12:02 +08:00
|
|
|
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
|
|
|
|
OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
|
|
|
|
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
|
|
|
|
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
|
|
|
|
|
|
|
|
// __kmpc_end_serialized_parallel(&Loc, GTid);
|
2016-03-29 13:34:15 +08:00
|
|
|
llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
CGF.EmitRuntimeCall(
|
2016-03-29 13:34:15 +08:00
|
|
|
RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
|
|
|
|
EndArgs);
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
};
|
2016-03-29 13:34:15 +08:00
|
|
|
if (IfCond)
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
|
2016-03-29 13:34:15 +08:00
|
|
|
else {
|
|
|
|
RegionCodeGenTy ThenRCG(ThenGen);
|
|
|
|
ThenRCG(CGF);
|
2016-03-28 20:58:34 +08:00
|
|
|
}
|
[OPENMP] Codegen for 'if' clause in 'parallel' directive.
Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive.
If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
global_thread_id.addr = alloca i32
store i32 global_thread_id, global_thread_id.addr
zero.addr = alloca i32
store i32 0, zero.addr
kmpc_serialized_parallel(loc, global_thread_id);
microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/);
kmpc_end_serialized_parallel(loc, global_thread_id);
Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call.
Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D4716
llvm-svn: 219597
2014-10-13 14:02:40 +08:00
|
|
|
}
|
|
|
|
|
2014-10-27 16:08:18 +08:00
|
|
|
// If we're inside an (outlined) parallel region, use the region info's
|
[OPENMP] Codegen for 'if' clause in 'parallel' directive.
Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive.
If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
global_thread_id.addr = alloca i32
store i32 global_thread_id, global_thread_id.addr
zero.addr = alloca i32
store i32 0, zero.addr
kmpc_serialized_parallel(loc, global_thread_id);
microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/);
kmpc_end_serialized_parallel(loc, global_thread_id);
Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call.
Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D4716
llvm-svn: 219597
2014-10-13 14:02:40 +08:00
|
|
|
// thread-ID variable (it is passed in a first argument of the outlined function
|
|
|
|
// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
|
|
|
|
// regular serial code region, get thread ID by calling kmp_int32
|
|
|
|
// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
|
|
|
|
// return the address of that temp.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
|
|
|
|
SourceLocation Loc) {
|
2016-01-22 16:56:50 +08:00
|
|
|
if (auto *OMPRegionInfo =
|
[OPENMP] Codegen for 'if' clause in 'parallel' directive.
Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive.
If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
global_thread_id.addr = alloca i32
store i32 global_thread_id, global_thread_id.addr
zero.addr = alloca i32
store i32 0, zero.addr
kmpc_serialized_parallel(loc, global_thread_id);
microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/);
kmpc_end_serialized_parallel(loc, global_thread_id);
Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call.
Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D4716
llvm-svn: 219597
2014-10-13 14:02:40 +08:00
|
|
|
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
|
2015-02-26 18:27:34 +08:00
|
|
|
if (OMPRegionInfo->getThreadIDVariable())
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
|
2015-02-26 18:27:34 +08:00
|
|
|
|
2015-02-25 16:32:46 +08:00
|
|
|
auto ThreadID = getThreadID(CGF, Loc);
|
[OPENMP] Codegen for 'if' clause in 'parallel' directive.
Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive.
If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
global_thread_id.addr = alloca i32
store i32 global_thread_id, global_thread_id.addr
zero.addr = alloca i32
store i32 0, zero.addr
kmpc_serialized_parallel(loc, global_thread_id);
microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/);
kmpc_end_serialized_parallel(loc, global_thread_id);
Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call.
Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D4716
llvm-svn: 219597
2014-10-13 14:02:40 +08:00
|
|
|
auto Int32Ty =
|
|
|
|
CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
|
|
|
|
auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
|
|
|
|
CGF.EmitStoreOfScalar(ThreadID,
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
|
[OPENMP] Codegen for 'if' clause in 'parallel' directive.
Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive.
If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
global_thread_id.addr = alloca i32
store i32 global_thread_id, global_thread_id.addr
zero.addr = alloca i32
store i32 0, zero.addr
kmpc_serialized_parallel(loc, global_thread_id);
microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/);
kmpc_end_serialized_parallel(loc, global_thread_id);
Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call.
Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D4716
llvm-svn: 219597
2014-10-13 14:02:40 +08:00
|
|
|
|
|
|
|
return ThreadIDTemp;
|
|
|
|
}
|
|
|
|
|
2014-11-11 12:05:39 +08:00
|
|
|
llvm::Constant *
|
2015-02-25 16:32:46 +08:00
|
|
|
CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
|
2014-11-11 12:05:39 +08:00
|
|
|
const llvm::Twine &Name) {
|
2014-09-22 18:01:53 +08:00
|
|
|
SmallString<256> Buffer;
|
|
|
|
llvm::raw_svector_ostream Out(Buffer);
|
2014-11-11 12:05:39 +08:00
|
|
|
Out << Name;
|
|
|
|
auto RuntimeName = Out.str();
|
2014-11-19 11:06:06 +08:00
|
|
|
auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
|
|
|
|
if (Elem.second) {
|
|
|
|
assert(Elem.second->getType()->getPointerElementType() == Ty &&
|
2014-11-11 12:05:39 +08:00
|
|
|
"OMP internal variable has different type than requested");
|
2014-11-19 11:06:06 +08:00
|
|
|
return &*Elem.second;
|
2014-11-11 12:05:39 +08:00
|
|
|
}
|
|
|
|
|
2014-11-19 11:06:06 +08:00
|
|
|
return Elem.second = new llvm::GlobalVariable(
|
|
|
|
CGM.getModule(), Ty, /*IsConstant*/ false,
|
|
|
|
llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
|
|
|
|
Elem.first());
|
2014-11-11 12:05:39 +08:00
|
|
|
}
|
|
|
|
|
2015-02-25 16:32:46 +08:00
|
|
|
llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
|
2014-11-11 12:05:39 +08:00
|
|
|
llvm::Twine Name(".gomp_critical_user_", CriticalName);
|
2015-02-25 16:32:46 +08:00
|
|
|
return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
|
2014-09-22 18:01:53 +08:00
|
|
|
}
|
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
namespace {
|
2016-03-29 13:34:15 +08:00
|
|
|
/// Common pre(post)-action for different OpenMP constructs.
|
|
|
|
class CommonActionTy final : public PrePostActionTy {
|
|
|
|
llvm::Value *EnterCallee;
|
|
|
|
ArrayRef<llvm::Value *> EnterArgs;
|
|
|
|
llvm::Value *ExitCallee;
|
|
|
|
ArrayRef<llvm::Value *> ExitArgs;
|
|
|
|
bool Conditional;
|
|
|
|
llvm::BasicBlock *ContBlock = nullptr;
|
2015-04-10 12:50:10 +08:00
|
|
|
|
|
|
|
public:
|
2016-03-29 13:34:15 +08:00
|
|
|
CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs,
|
|
|
|
llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs,
|
|
|
|
bool Conditional = false)
|
|
|
|
: EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
|
|
|
|
ExitArgs(ExitArgs), Conditional(Conditional) {}
|
|
|
|
void Enter(CodeGenFunction &CGF) override {
|
|
|
|
llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
|
|
|
|
if (Conditional) {
|
|
|
|
llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
|
|
|
|
auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
|
|
|
|
ContBlock = CGF.createBasicBlock("omp_if.end");
|
|
|
|
// Generate the branch (If-stmt)
|
|
|
|
CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
|
|
|
|
CGF.EmitBlock(ThenBlock);
|
|
|
|
}
|
2016-03-28 20:52:58 +08:00
|
|
|
}
|
2016-03-29 13:34:15 +08:00
|
|
|
void Done(CodeGenFunction &CGF) {
|
|
|
|
// Emit the rest of blocks/branches
|
|
|
|
CGF.EmitBranch(ContBlock);
|
|
|
|
CGF.EmitBlock(ContBlock, true);
|
|
|
|
}
|
|
|
|
void Exit(CodeGenFunction &CGF) override {
|
|
|
|
CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
|
2015-04-10 15:48:12 +08:00
|
|
|
}
|
2015-04-10 12:50:10 +08:00
|
|
|
};
|
2015-09-11 01:07:54 +08:00
|
|
|
} // anonymous namespace
|
2015-04-10 12:50:10 +08:00
|
|
|
|
|
|
|
void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
|
|
|
|
StringRef CriticalName,
|
|
|
|
const RegionCodeGenTy &CriticalOpGen,
|
2015-12-15 18:55:09 +08:00
|
|
|
SourceLocation Loc, const Expr *Hint) {
|
|
|
|
// __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
|
2014-12-01 19:32:38 +08:00
|
|
|
// CriticalOpGen();
|
|
|
|
// __kmpc_end_critical(ident_t *, gtid, Lock);
|
|
|
|
// Prepare arguments and build a call to __kmpc_critical
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-12-15 18:55:09 +08:00
|
|
|
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
|
|
|
|
getCriticalRegionLock(CriticalName)};
|
2016-03-29 13:34:15 +08:00
|
|
|
llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
|
|
|
|
std::end(Args));
|
2015-12-15 18:55:09 +08:00
|
|
|
if (Hint) {
|
2016-03-29 13:34:15 +08:00
|
|
|
EnterArgs.push_back(CGF.Builder.CreateIntCast(
|
|
|
|
CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
|
|
|
|
}
|
|
|
|
CommonActionTy Action(
|
|
|
|
createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
|
|
|
|
: OMPRTL__kmpc_critical),
|
|
|
|
EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
|
|
|
|
CriticalOpGen.setAction(Action);
|
2015-12-15 18:55:09 +08:00
|
|
|
emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
|
2014-09-22 18:01:53 +08:00
|
|
|
}
|
2014-10-08 22:01:46 +08:00
|
|
|
|
2015-02-25 16:32:46 +08:00
|
|
|
void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
|
2015-04-10 12:50:10 +08:00
|
|
|
const RegionCodeGenTy &MasterOpGen,
|
2015-02-25 16:32:46 +08:00
|
|
|
SourceLocation Loc) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2014-12-04 15:23:53 +08:00
|
|
|
// if(__kmpc_master(ident_t *, gtid)) {
|
|
|
|
// MasterOpGen();
|
|
|
|
// __kmpc_end_master(ident_t *, gtid);
|
|
|
|
// }
|
|
|
|
// Prepare arguments and build a call to __kmpc_master
|
2015-04-10 14:33:45 +08:00
|
|
|
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
|
2016-03-29 13:34:15 +08:00
|
|
|
CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
|
|
|
|
createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
|
|
|
|
/*Conditional=*/true);
|
|
|
|
MasterOpGen.setAction(Action);
|
|
|
|
emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
|
|
|
|
Action.Done(CGF);
|
2014-12-04 15:23:53 +08:00
|
|
|
}
|
|
|
|
|
2015-02-25 16:32:46 +08:00
|
|
|
void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
|
|
|
|
SourceLocation Loc) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-02-05 13:57:51 +08:00
|
|
|
// Build call __kmpc_omp_taskyield(loc, thread_id, 0);
|
|
|
|
llvm::Value *Args[] = {
|
2015-02-25 16:32:46 +08:00
|
|
|
emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
|
2015-02-05 13:57:51 +08:00
|
|
|
llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
|
2015-02-25 16:32:46 +08:00
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
|
2016-04-20 12:01:36 +08:00
|
|
|
if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
|
|
|
|
Region->emitUntiedSwitch(CGF);
|
2015-02-05 13:57:51 +08:00
|
|
|
}
|
|
|
|
|
2015-06-18 20:14:09 +08:00
|
|
|
void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
|
|
|
|
const RegionCodeGenTy &TaskgroupOpGen,
|
|
|
|
SourceLocation Loc) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-06-18 20:14:09 +08:00
|
|
|
// __kmpc_taskgroup(ident_t *, gtid);
|
|
|
|
// TaskgroupOpGen();
|
|
|
|
// __kmpc_end_taskgroup(ident_t *, gtid);
|
|
|
|
// Prepare arguments and build a call to __kmpc_taskgroup
|
2016-03-29 13:34:15 +08:00
|
|
|
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
|
|
|
|
CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
|
|
|
|
createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
|
|
|
|
Args);
|
|
|
|
TaskgroupOpGen.setAction(Action);
|
|
|
|
emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
|
2015-06-18 20:14:09 +08:00
|
|
|
}
|
|
|
|
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
/// Given an array of pointers to variables, project the address of a
|
|
|
|
/// given variable.
|
2015-10-08 17:10:53 +08:00
|
|
|
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
|
|
|
|
unsigned Index, const VarDecl *Var) {
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
// Pull out the pointer to the variable.
|
|
|
|
Address PtrAddr =
|
2015-10-08 17:10:53 +08:00
|
|
|
CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
|
|
|
|
|
|
|
|
Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
|
2015-10-08 17:10:53 +08:00
|
|
|
Addr = CGF.Builder.CreateElementBitCast(
|
|
|
|
Addr, CGF.ConvertTypeForMem(Var->getType()));
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
return Addr;
|
|
|
|
}
|
|
|
|
|
2015-03-23 14:18:07 +08:00
|
|
|
static llvm::Value *emitCopyprivateCopyFunction(
|
2015-04-14 13:11:24 +08:00
|
|
|
CodeGenModule &CGM, llvm::Type *ArgsType,
|
|
|
|
ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
|
|
|
|
ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
|
2015-03-23 14:18:07 +08:00
|
|
|
auto &C = CGM.getContext();
|
|
|
|
// void copy_func(void *LHSArg, void *RHSArg);
|
|
|
|
FunctionArgList Args;
|
2017-06-09 21:40:18 +08:00
|
|
|
ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
|
|
|
|
ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
|
2015-03-23 14:18:07 +08:00
|
|
|
Args.push_back(&LHSArg);
|
|
|
|
Args.push_back(&RHSArg);
|
2016-03-11 12:30:31 +08:00
|
|
|
auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
|
2015-03-23 14:18:07 +08:00
|
|
|
auto *Fn = llvm::Function::Create(
|
|
|
|
CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
|
|
|
|
".omp.copyprivate.copy_func", &CGM.getModule());
|
2015-10-28 10:30:47 +08:00
|
|
|
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
|
2015-03-23 14:18:07 +08:00
|
|
|
CodeGenFunction CGF(CGM);
|
|
|
|
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
|
2015-04-14 13:11:24 +08:00
|
|
|
// Dest = (void*[n])(LHSArg);
|
2015-03-23 14:18:07 +08:00
|
|
|
// Src = (void*[n])(RHSArg);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
|
CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
|
|
|
|
ArgsType), CGF.getPointerAlign());
|
|
|
|
Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
|
CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
|
|
|
|
ArgsType), CGF.getPointerAlign());
|
2015-03-23 14:18:07 +08:00
|
|
|
// *(Type0*)Dst[0] = *(Type0*)Src[0];
|
|
|
|
// *(Type1*)Dst[1] = *(Type1*)Src[1];
|
|
|
|
// ...
|
|
|
|
// *(Typen*)Dst[n] = *(Typen*)Src[n];
|
|
|
|
for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
|
|
|
|
Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
|
|
|
|
|
|
|
|
auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
|
|
|
|
Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
|
|
|
|
|
2015-05-19 20:31:28 +08:00
|
|
|
auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
|
|
|
|
QualType Type = VD->getType();
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
|
2015-03-23 14:18:07 +08:00
|
|
|
}
|
|
|
|
CGF.FinishFunction();
|
|
|
|
return Fn;
|
|
|
|
}
|
|
|
|
|
2015-02-25 16:32:46 +08:00
|
|
|
void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
|
2015-04-10 12:50:10 +08:00
|
|
|
const RegionCodeGenTy &SingleOpGen,
|
2015-03-23 14:18:07 +08:00
|
|
|
SourceLocation Loc,
|
|
|
|
ArrayRef<const Expr *> CopyprivateVars,
|
|
|
|
ArrayRef<const Expr *> SrcExprs,
|
|
|
|
ArrayRef<const Expr *> DstExprs,
|
|
|
|
ArrayRef<const Expr *> AssignmentOps) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-03-23 14:18:07 +08:00
|
|
|
assert(CopyprivateVars.size() == SrcExprs.size() &&
|
|
|
|
CopyprivateVars.size() == DstExprs.size() &&
|
|
|
|
CopyprivateVars.size() == AssignmentOps.size());
|
|
|
|
auto &C = CGM.getContext();
|
|
|
|
// int32 did_it = 0;
|
2015-02-05 14:35:41 +08:00
|
|
|
// if(__kmpc_single(ident_t *, gtid)) {
|
|
|
|
// SingleOpGen();
|
|
|
|
// __kmpc_end_single(ident_t *, gtid);
|
2015-03-23 14:18:07 +08:00
|
|
|
// did_it = 1;
|
2015-02-05 14:35:41 +08:00
|
|
|
// }
|
2015-03-23 14:18:07 +08:00
|
|
|
// call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
|
|
|
|
// <copy_func>, did_it);
|
|
|
|
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address DidIt = Address::invalid();
|
2015-03-23 14:18:07 +08:00
|
|
|
if (!CopyprivateVars.empty()) {
|
|
|
|
// int32 did_it = 0;
|
|
|
|
auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
|
|
|
|
DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
|
2015-03-23 14:18:07 +08:00
|
|
|
}
|
2015-02-05 14:35:41 +08:00
|
|
|
// Prepare arguments and build a call to __kmpc_single
|
2015-04-10 14:33:45 +08:00
|
|
|
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
|
2016-03-29 13:34:15 +08:00
|
|
|
CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
|
|
|
|
createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
|
|
|
|
/*Conditional=*/true);
|
|
|
|
SingleOpGen.setAction(Action);
|
|
|
|
emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
|
|
|
|
if (DidIt.isValid()) {
|
|
|
|
// did_it = 1;
|
|
|
|
CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
|
|
|
|
}
|
|
|
|
Action.Done(CGF);
|
2015-03-23 14:18:07 +08:00
|
|
|
// call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
|
|
|
|
// <copy_func>, did_it);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
if (DidIt.isValid()) {
|
2015-03-23 14:18:07 +08:00
|
|
|
llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
|
|
|
|
auto CopyprivateArrayTy =
|
|
|
|
C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
|
|
|
|
/*IndexTypeQuals=*/0);
|
|
|
|
// Create a list of all private variables for copyprivate.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address CopyprivateList =
|
2015-03-23 14:18:07 +08:00
|
|
|
CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
|
|
|
|
for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address Elem = CGF.Builder.CreateConstArrayGEP(
|
|
|
|
CopyprivateList, I, CGF.getPointerSize());
|
|
|
|
CGF.Builder.CreateStore(
|
2015-03-23 14:18:07 +08:00
|
|
|
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
|
|
|
|
Elem);
|
2015-03-23 14:18:07 +08:00
|
|
|
}
|
|
|
|
// Build function that copies private values from single region to all other
|
|
|
|
// threads in the corresponding parallel region.
|
|
|
|
auto *CpyFn = emitCopyprivateCopyFunction(
|
|
|
|
CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
|
2015-04-14 13:11:24 +08:00
|
|
|
CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
|
2016-01-26 20:20:39 +08:00
|
|
|
auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address CL =
|
|
|
|
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
|
|
|
|
CGF.VoidPtrTy);
|
|
|
|
auto *DidItVal = CGF.Builder.CreateLoad(DidIt);
|
2015-03-23 14:18:07 +08:00
|
|
|
llvm::Value *Args[] = {
|
|
|
|
emitUpdateLocation(CGF, Loc), // ident_t *<loc>
|
|
|
|
getThreadID(CGF, Loc), // i32 <gtid>
|
2015-04-30 11:47:32 +08:00
|
|
|
BufSize, // size_t <buf_size>
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CL.getPointer(), // void *<copyprivate list>
|
2015-03-23 14:18:07 +08:00
|
|
|
CpyFn, // void (*) (void *, void *) <copy_func>
|
|
|
|
DidItVal // i32 did_it
|
|
|
|
};
|
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
|
|
|
|
}
|
2015-02-05 14:35:41 +08:00
|
|
|
}
|
|
|
|
|
2015-04-22 19:15:40 +08:00
|
|
|
void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
|
|
|
|
const RegionCodeGenTy &OrderedOpGen,
|
2015-09-29 11:48:57 +08:00
|
|
|
SourceLocation Loc, bool IsThreads) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-04-22 19:15:40 +08:00
|
|
|
// __kmpc_ordered(ident_t *, gtid);
|
|
|
|
// OrderedOpGen();
|
|
|
|
// __kmpc_end_ordered(ident_t *, gtid);
|
|
|
|
// Prepare arguments and build a call to __kmpc_ordered
|
2015-09-29 11:48:57 +08:00
|
|
|
if (IsThreads) {
|
2015-04-22 19:15:40 +08:00
|
|
|
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
|
2016-03-29 13:34:15 +08:00
|
|
|
CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
|
|
|
|
createRuntimeFunction(OMPRTL__kmpc_end_ordered),
|
|
|
|
Args);
|
|
|
|
OrderedOpGen.setAction(Action);
|
|
|
|
emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
|
|
|
|
return;
|
2015-04-22 19:15:40 +08:00
|
|
|
}
|
2015-09-29 11:48:57 +08:00
|
|
|
emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
|
2015-04-22 19:15:40 +08:00
|
|
|
}
|
|
|
|
|
2015-02-25 16:32:46 +08:00
|
|
|
void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
|
2015-09-15 20:52:43 +08:00
|
|
|
OpenMPDirectiveKind Kind, bool EmitChecks,
|
|
|
|
bool ForceSimpleCall) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2014-12-05 12:09:23 +08:00
|
|
|
// Build call __kmpc_cancel_barrier(loc, thread_id);
|
2015-07-03 17:56:58 +08:00
|
|
|
// Build call __kmpc_barrier(loc, thread_id);
|
2016-02-19 18:38:26 +08:00
|
|
|
unsigned Flags;
|
|
|
|
if (Kind == OMPD_for)
|
|
|
|
Flags = OMP_IDENT_BARRIER_IMPL_FOR;
|
|
|
|
else if (Kind == OMPD_sections)
|
|
|
|
Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
|
|
|
|
else if (Kind == OMPD_single)
|
|
|
|
Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
|
|
|
|
else if (Kind == OMPD_barrier)
|
|
|
|
Flags = OMP_IDENT_BARRIER_EXPL;
|
|
|
|
else
|
|
|
|
Flags = OMP_IDENT_BARRIER_IMPL;
|
2015-07-03 17:56:58 +08:00
|
|
|
// Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
|
|
|
|
// thread_id);
|
2015-02-25 16:32:46 +08:00
|
|
|
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
|
|
|
|
getThreadID(CGF, Loc)};
|
2016-01-22 16:56:50 +08:00
|
|
|
if (auto *OMPRegionInfo =
|
|
|
|
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
|
2015-09-15 20:52:43 +08:00
|
|
|
if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
|
2015-07-03 17:56:58 +08:00
|
|
|
auto *Result = CGF.EmitRuntimeCall(
|
|
|
|
createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
|
2015-09-15 20:52:43 +08:00
|
|
|
if (EmitChecks) {
|
2015-07-03 17:56:58 +08:00
|
|
|
// if (__kmpc_cancel_barrier()) {
|
|
|
|
// exit from construct;
|
|
|
|
// }
|
|
|
|
auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
|
|
|
|
auto *ContBB = CGF.createBasicBlock(".cancel.continue");
|
|
|
|
auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
|
|
|
|
CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
|
|
|
|
CGF.EmitBlock(ExitBB);
|
|
|
|
// exit from construct;
|
2015-09-15 20:52:43 +08:00
|
|
|
auto CancelDestination =
|
|
|
|
CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
|
2015-07-03 17:56:58 +08:00
|
|
|
CGF.EmitBranchThroughCleanup(CancelDestination);
|
|
|
|
CGF.EmitBlock(ContBB, /*IsFinished=*/true);
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
|
2014-10-08 22:01:46 +08:00
|
|
|
}
|
|
|
|
|
2014-12-15 15:07:06 +08:00
|
|
|
/// \brief Map the OpenMP loop schedule to the runtime enumeration.
|
|
|
|
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
|
2015-05-20 21:12:48 +08:00
|
|
|
bool Chunked, bool Ordered) {
|
2014-12-15 15:07:06 +08:00
|
|
|
switch (ScheduleKind) {
|
|
|
|
case OMPC_SCHEDULE_static:
|
2015-05-20 21:12:48 +08:00
|
|
|
return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
|
|
|
|
: (Ordered ? OMP_ord_static : OMP_sch_static);
|
2014-12-15 15:07:06 +08:00
|
|
|
case OMPC_SCHEDULE_dynamic:
|
2015-05-20 21:12:48 +08:00
|
|
|
return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
|
2014-12-15 15:07:06 +08:00
|
|
|
case OMPC_SCHEDULE_guided:
|
2015-05-20 21:12:48 +08:00
|
|
|
return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
|
2014-12-15 15:07:06 +08:00
|
|
|
case OMPC_SCHEDULE_runtime:
|
2015-05-20 21:12:48 +08:00
|
|
|
return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
|
|
|
|
case OMPC_SCHEDULE_auto:
|
|
|
|
return Ordered ? OMP_ord_auto : OMP_sch_auto;
|
2014-12-15 15:07:06 +08:00
|
|
|
case OMPC_SCHEDULE_unknown:
|
|
|
|
assert(!Chunked && "chunk was specified but schedule kind not known");
|
2015-05-20 21:12:48 +08:00
|
|
|
return Ordered ? OMP_ord_static : OMP_sch_static;
|
2014-12-15 15:07:06 +08:00
|
|
|
}
|
|
|
|
llvm_unreachable("Unexpected runtime schedule");
|
|
|
|
}
|
|
|
|
|
2016-03-08 00:04:49 +08:00
|
|
|
/// \brief Map the OpenMP distribute schedule to the runtime enumeration.
|
|
|
|
static OpenMPSchedType
|
|
|
|
getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
|
|
|
|
// only static is allowed for dist_schedule
|
|
|
|
return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
|
|
|
|
}
|
|
|
|
|
2014-12-15 15:07:06 +08:00
|
|
|
bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
|
|
|
|
bool Chunked) const {
|
2015-05-20 21:12:48 +08:00
|
|
|
auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
|
2014-12-15 15:07:06 +08:00
|
|
|
return Schedule == OMP_sch_static;
|
|
|
|
}
|
|
|
|
|
2016-03-08 00:04:49 +08:00
|
|
|
bool CGOpenMPRuntime::isStaticNonchunked(
|
|
|
|
OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
|
|
|
|
auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
|
|
|
|
return Schedule == OMP_dist_sch_static;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2015-01-22 16:49:35 +08:00
|
|
|
bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
|
2015-05-20 21:12:48 +08:00
|
|
|
auto Schedule =
|
|
|
|
getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
|
2015-01-22 16:49:35 +08:00
|
|
|
assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
|
|
|
|
return Schedule != OMP_sch_static;
|
|
|
|
}
|
|
|
|
|
2016-05-10 17:57:36 +08:00
|
|
|
static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
|
|
|
|
OpenMPScheduleClauseModifier M1,
|
|
|
|
OpenMPScheduleClauseModifier M2) {
|
2016-05-30 21:05:14 +08:00
|
|
|
int Modifier = 0;
|
2016-05-10 17:57:36 +08:00
|
|
|
switch (M1) {
|
|
|
|
case OMPC_SCHEDULE_MODIFIER_monotonic:
|
2016-05-30 21:05:14 +08:00
|
|
|
Modifier = OMP_sch_modifier_monotonic;
|
|
|
|
break;
|
2016-05-10 17:57:36 +08:00
|
|
|
case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
|
2016-05-30 21:05:14 +08:00
|
|
|
Modifier = OMP_sch_modifier_nonmonotonic;
|
|
|
|
break;
|
2016-05-10 17:57:36 +08:00
|
|
|
case OMPC_SCHEDULE_MODIFIER_simd:
|
2016-05-30 21:05:14 +08:00
|
|
|
if (Schedule == OMP_sch_static_chunked)
|
|
|
|
Schedule = OMP_sch_static_balanced_chunked;
|
|
|
|
break;
|
2016-05-10 17:57:36 +08:00
|
|
|
case OMPC_SCHEDULE_MODIFIER_last:
|
|
|
|
case OMPC_SCHEDULE_MODIFIER_unknown:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
switch (M2) {
|
|
|
|
case OMPC_SCHEDULE_MODIFIER_monotonic:
|
2016-05-30 21:05:14 +08:00
|
|
|
Modifier = OMP_sch_modifier_monotonic;
|
|
|
|
break;
|
2016-05-10 17:57:36 +08:00
|
|
|
case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
|
2016-05-30 21:05:14 +08:00
|
|
|
Modifier = OMP_sch_modifier_nonmonotonic;
|
|
|
|
break;
|
2016-05-10 17:57:36 +08:00
|
|
|
case OMPC_SCHEDULE_MODIFIER_simd:
|
2016-05-30 21:05:14 +08:00
|
|
|
if (Schedule == OMP_sch_static_chunked)
|
|
|
|
Schedule = OMP_sch_static_balanced_chunked;
|
|
|
|
break;
|
2016-05-10 17:57:36 +08:00
|
|
|
case OMPC_SCHEDULE_MODIFIER_last:
|
|
|
|
case OMPC_SCHEDULE_MODIFIER_unknown:
|
|
|
|
break;
|
|
|
|
}
|
2016-05-30 21:05:14 +08:00
|
|
|
return Schedule | Modifier;
|
2016-05-10 17:57:36 +08:00
|
|
|
}
|
|
|
|
|
2017-04-26 01:52:12 +08:00
|
|
|
void CGOpenMPRuntime::emitForDispatchInit(
|
|
|
|
CodeGenFunction &CGF, SourceLocation Loc,
|
|
|
|
const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
|
|
|
|
bool Ordered, const DispatchRTInput &DispatchValues) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2017-04-26 01:52:12 +08:00
|
|
|
OpenMPSchedType Schedule = getRuntimeSchedule(
|
|
|
|
ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
assert(Ordered ||
|
|
|
|
(Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
|
2016-05-30 21:05:14 +08:00
|
|
|
Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
|
|
|
|
Schedule != OMP_sch_static_balanced_chunked));
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
// Call __kmpc_dispatch_init(
|
|
|
|
// ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
|
|
|
|
// kmp_int[32|64] lower, kmp_int[32|64] upper,
|
|
|
|
// kmp_int[32|64] stride, kmp_int[32|64] chunk);
|
|
|
|
|
|
|
|
// If the Chunk was not specified in the clause - use default value 1.
|
2017-04-26 01:52:12 +08:00
|
|
|
llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
|
|
|
|
: CGF.Builder.getIntN(IVSize, 1);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
llvm::Value *Args[] = {
|
2016-05-10 17:57:36 +08:00
|
|
|
emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
|
|
|
|
CGF.Builder.getInt32(addMonoNonMonoModifier(
|
|
|
|
Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
|
2017-04-26 01:52:12 +08:00
|
|
|
DispatchValues.LB, // Lower
|
|
|
|
DispatchValues.UB, // Upper
|
2016-05-10 17:57:36 +08:00
|
|
|
CGF.Builder.getIntN(IVSize, 1), // Stride
|
|
|
|
Chunk // Chunk
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
};
|
|
|
|
CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
|
|
|
|
}
|
2015-03-12 21:37:50 +08:00
|
|
|
|
2016-05-10 17:57:36 +08:00
|
|
|
static void emitForStaticInitCall(
|
|
|
|
CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
|
|
|
|
llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule,
|
|
|
|
OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
|
|
|
|
unsigned IVSize, bool Ordered, Address IL, Address LB, Address UB,
|
|
|
|
Address ST, llvm::Value *Chunk) {
|
2016-03-08 00:04:49 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
|
|
|
|
|
|
|
assert(!Ordered);
|
|
|
|
assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
|
2016-05-30 21:05:14 +08:00
|
|
|
Schedule == OMP_sch_static_balanced_chunked ||
|
2016-03-08 00:04:49 +08:00
|
|
|
Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
|
|
|
|
Schedule == OMP_dist_sch_static ||
|
|
|
|
Schedule == OMP_dist_sch_static_chunked);
|
|
|
|
|
|
|
|
// Call __kmpc_for_static_init(
|
|
|
|
// ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
|
|
|
|
// kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
|
|
|
|
// kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
|
|
|
|
// kmp_int[32|64] incr, kmp_int[32|64] chunk);
|
|
|
|
if (Chunk == nullptr) {
|
|
|
|
assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
|
|
|
|
Schedule == OMP_dist_sch_static) &&
|
|
|
|
"expected static non-chunked schedule");
|
|
|
|
// If the Chunk was not specified in the clause - use default value 1.
|
|
|
|
Chunk = CGF.Builder.getIntN(IVSize, 1);
|
|
|
|
} else {
|
|
|
|
assert((Schedule == OMP_sch_static_chunked ||
|
2016-05-30 21:05:14 +08:00
|
|
|
Schedule == OMP_sch_static_balanced_chunked ||
|
2016-03-08 00:04:49 +08:00
|
|
|
Schedule == OMP_ord_static_chunked ||
|
|
|
|
Schedule == OMP_dist_sch_static_chunked) &&
|
|
|
|
"expected static chunked schedule");
|
|
|
|
}
|
|
|
|
llvm::Value *Args[] = {
|
2016-05-10 17:57:36 +08:00
|
|
|
UpdateLocation, ThreadId, CGF.Builder.getInt32(addMonoNonMonoModifier(
|
|
|
|
Schedule, M1, M2)), // Schedule type
|
|
|
|
IL.getPointer(), // &isLastIter
|
|
|
|
LB.getPointer(), // &LB
|
|
|
|
UB.getPointer(), // &UB
|
|
|
|
ST.getPointer(), // &Stride
|
|
|
|
CGF.Builder.getIntN(IVSize, 1), // Incr
|
|
|
|
Chunk // Chunk
|
2016-03-08 00:04:49 +08:00
|
|
|
};
|
|
|
|
CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
|
|
|
|
}
|
|
|
|
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
|
|
|
|
SourceLocation Loc,
|
2016-05-10 17:57:36 +08:00
|
|
|
const OpenMPScheduleTy &ScheduleKind,
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
unsigned IVSize, bool IVSigned,
|
|
|
|
bool Ordered, Address IL, Address LB,
|
|
|
|
Address UB, Address ST,
|
|
|
|
llvm::Value *Chunk) {
|
2016-05-10 17:57:36 +08:00
|
|
|
OpenMPSchedType ScheduleNum =
|
|
|
|
getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered);
|
2016-03-08 00:04:49 +08:00
|
|
|
auto *UpdatedLocation = emitUpdateLocation(CGF, Loc);
|
|
|
|
auto *ThreadId = getThreadID(CGF, Loc);
|
|
|
|
auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned);
|
2016-05-10 17:57:36 +08:00
|
|
|
emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
|
|
|
|
ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, IVSize,
|
|
|
|
Ordered, IL, LB, UB, ST, Chunk);
|
2016-03-08 00:04:49 +08:00
|
|
|
}
|
|
|
|
|
2016-05-10 17:57:36 +08:00
|
|
|
void CGOpenMPRuntime::emitDistributeStaticInit(
|
|
|
|
CodeGenFunction &CGF, SourceLocation Loc,
|
|
|
|
OpenMPDistScheduleClauseKind SchedKind, unsigned IVSize, bool IVSigned,
|
|
|
|
bool Ordered, Address IL, Address LB, Address UB, Address ST,
|
2016-03-08 00:04:49 +08:00
|
|
|
llvm::Value *Chunk) {
|
|
|
|
OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != nullptr);
|
|
|
|
auto *UpdatedLocation = emitUpdateLocation(CGF, Loc);
|
|
|
|
auto *ThreadId = getThreadID(CGF, Loc);
|
|
|
|
auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned);
|
2016-05-10 17:57:36 +08:00
|
|
|
emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
|
|
|
|
ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
|
|
|
|
OMPC_SCHEDULE_MODIFIER_unknown, IVSize, Ordered, IL, LB,
|
|
|
|
UB, ST, Chunk);
|
2014-12-15 15:07:06 +08:00
|
|
|
}
|
|
|
|
|
2015-04-22 19:15:40 +08:00
|
|
|
void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
|
|
|
|
SourceLocation Loc) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2014-12-15 15:07:06 +08:00
|
|
|
// Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
|
2016-02-19 18:38:26 +08:00
|
|
|
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
|
2015-02-25 16:32:46 +08:00
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
|
|
|
|
Args);
|
2014-12-15 15:07:06 +08:00
|
|
|
}
|
|
|
|
|
2015-05-20 21:12:48 +08:00
|
|
|
void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
|
|
|
|
SourceLocation Loc,
|
|
|
|
unsigned IVSize,
|
|
|
|
bool IVSigned) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-04-22 19:15:40 +08:00
|
|
|
// Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
|
2016-02-19 18:38:26 +08:00
|
|
|
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
|
2015-04-22 19:15:40 +08:00
|
|
|
CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
|
|
|
|
}
|
|
|
|
|
2015-03-12 21:37:50 +08:00
|
|
|
llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
|
|
|
|
SourceLocation Loc, unsigned IVSize,
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
bool IVSigned, Address IL,
|
|
|
|
Address LB, Address UB,
|
|
|
|
Address ST) {
|
2015-03-12 21:37:50 +08:00
|
|
|
// Call __kmpc_dispatch_next(
|
|
|
|
// ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
|
|
|
|
// kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
|
|
|
|
// kmp_int[32|64] *p_stride);
|
|
|
|
llvm::Value *Args[] = {
|
2016-02-19 18:38:26 +08:00
|
|
|
emitUpdateLocation(CGF, Loc),
|
|
|
|
getThreadID(CGF, Loc),
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
IL.getPointer(), // &isLastIter
|
|
|
|
LB.getPointer(), // &Lower
|
|
|
|
UB.getPointer(), // &Upper
|
|
|
|
ST.getPointer() // &Stride
|
2015-03-12 21:37:50 +08:00
|
|
|
};
|
|
|
|
llvm::Value *Call =
|
|
|
|
CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
|
|
|
|
return CGF.EmitScalarConversion(
|
|
|
|
Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
|
2015-08-11 12:19:28 +08:00
|
|
|
CGF.getContext().BoolTy, Loc);
|
2015-03-12 21:37:50 +08:00
|
|
|
}
|
|
|
|
|
2015-02-25 16:32:46 +08:00
|
|
|
void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
|
|
|
|
llvm::Value *NumThreads,
|
|
|
|
SourceLocation Loc) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2014-10-13 16:23:51 +08:00
|
|
|
// Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
|
|
|
|
llvm::Value *Args[] = {
|
2015-02-25 16:32:46 +08:00
|
|
|
emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
|
2014-10-13 16:23:51 +08:00
|
|
|
CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
|
2015-02-25 16:32:46 +08:00
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
|
|
|
|
Args);
|
2014-10-13 16:23:51 +08:00
|
|
|
}
|
|
|
|
|
2015-06-18 21:40:03 +08:00
|
|
|
void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
|
|
|
|
OpenMPProcBindClauseKind ProcBind,
|
|
|
|
SourceLocation Loc) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-06-18 21:40:03 +08:00
|
|
|
// Constants for proc bind value accepted by the runtime.
|
|
|
|
enum ProcBindTy {
|
|
|
|
ProcBindFalse = 0,
|
|
|
|
ProcBindTrue,
|
|
|
|
ProcBindMaster,
|
|
|
|
ProcBindClose,
|
|
|
|
ProcBindSpread,
|
|
|
|
ProcBindIntel,
|
|
|
|
ProcBindDefault
|
|
|
|
} RuntimeProcBind;
|
|
|
|
switch (ProcBind) {
|
|
|
|
case OMPC_PROC_BIND_master:
|
|
|
|
RuntimeProcBind = ProcBindMaster;
|
|
|
|
break;
|
|
|
|
case OMPC_PROC_BIND_close:
|
|
|
|
RuntimeProcBind = ProcBindClose;
|
|
|
|
break;
|
|
|
|
case OMPC_PROC_BIND_spread:
|
|
|
|
RuntimeProcBind = ProcBindSpread;
|
|
|
|
break;
|
|
|
|
case OMPC_PROC_BIND_unknown:
|
|
|
|
llvm_unreachable("Unsupported proc_bind value.");
|
|
|
|
}
|
|
|
|
// Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
|
|
|
|
llvm::Value *Args[] = {
|
|
|
|
emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
|
|
|
|
llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
|
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
|
|
|
|
}
|
|
|
|
|
2015-02-25 16:32:46 +08:00
|
|
|
void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
|
|
|
|
SourceLocation Loc) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-02-24 20:55:09 +08:00
|
|
|
// Build call void __kmpc_flush(ident_t *loc)
|
2015-02-25 16:32:46 +08:00
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
|
|
|
|
emitUpdateLocation(CGF, Loc));
|
2014-11-20 12:34:54 +08:00
|
|
|
}
|
2015-02-25 16:32:46 +08:00
|
|
|
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
namespace {
|
|
|
|
/// \brief Indexes of fields for type kmp_task_t.
|
|
|
|
enum KmpTaskTFields {
|
|
|
|
/// \brief List of shared variables.
|
|
|
|
KmpTaskTShareds,
|
|
|
|
/// \brief Task routine.
|
|
|
|
KmpTaskTRoutine,
|
|
|
|
/// \brief Partition id for the untied tasks.
|
|
|
|
KmpTaskTPartId,
|
2016-05-30 17:06:50 +08:00
|
|
|
/// Function with call of destructors for private variables.
|
|
|
|
Data1,
|
|
|
|
/// Task priority.
|
|
|
|
Data2,
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
/// (Taskloops only) Lower bound.
|
|
|
|
KmpTaskTLowerBound,
|
|
|
|
/// (Taskloops only) Upper bound.
|
|
|
|
KmpTaskTUpperBound,
|
|
|
|
/// (Taskloops only) Stride.
|
|
|
|
KmpTaskTStride,
|
|
|
|
/// (Taskloops only) Is last iteration flag.
|
|
|
|
KmpTaskTLastIter,
|
2017-07-17 21:30:36 +08:00
|
|
|
/// (Taskloops only) Reduction data.
|
|
|
|
KmpTaskTReductions,
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
};
|
2015-09-11 01:07:54 +08:00
|
|
|
} // anonymous namespace
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
|
2016-01-06 21:42:12 +08:00
|
|
|
bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
|
|
|
|
// FIXME: Add other entries type when they become supported.
|
|
|
|
return OffloadEntriesTargetRegion.empty();
|
|
|
|
}
|
|
|
|
|
|
|
|
/// \brief Initialize target region entry.
|
|
|
|
void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
|
|
|
|
initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
|
|
|
|
StringRef ParentName, unsigned LineNum,
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
unsigned Order) {
|
2016-01-06 21:42:12 +08:00
|
|
|
assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
|
|
|
|
"only required for the device "
|
|
|
|
"code generation.");
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
|
[OpenMP] Add fields for flags in the offload entry descriptor.
Summary:
This patch adds two fields to the offload entry descriptor. One field is meant to signal Ctors/Dtors and `link` global variables, and the other is reserved for runtime library use.
Currently, these fields are only filled with zeros in the current code generation, but that will change when `declare target` is added.
The reason, we are adding these fields now is to make the code generation consistent with the runtime library proposal under review in https://reviews.llvm.org/D14031.
Reviewers: ABataev, hfinkel, carlo.bertolli, kkwli0, arpith-jacob, Hahnfeld
Subscribers: cfe-commits, caomhin, jholewinski
Differential Revision: https://reviews.llvm.org/D28298
llvm-svn: 291124
2017-01-06 00:02:49 +08:00
|
|
|
OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
|
|
|
|
/*Flags=*/0);
|
2016-01-06 21:42:12 +08:00
|
|
|
++OffloadingEntriesNum;
|
|
|
|
}
|
|
|
|
|
|
|
|
void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
|
|
|
|
registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
|
|
|
|
StringRef ParentName, unsigned LineNum,
|
[OpenMP] Add fields for flags in the offload entry descriptor.
Summary:
This patch adds two fields to the offload entry descriptor. One field is meant to signal Ctors/Dtors and `link` global variables, and the other is reserved for runtime library use.
Currently, these fields are only filled with zeros in the current code generation, but that will change when `declare target` is added.
The reason, we are adding these fields now is to make the code generation consistent with the runtime library proposal under review in https://reviews.llvm.org/D14031.
Reviewers: ABataev, hfinkel, carlo.bertolli, kkwli0, arpith-jacob, Hahnfeld
Subscribers: cfe-commits, caomhin, jholewinski
Differential Revision: https://reviews.llvm.org/D28298
llvm-svn: 291124
2017-01-06 00:02:49 +08:00
|
|
|
llvm::Constant *Addr, llvm::Constant *ID,
|
|
|
|
int32_t Flags) {
|
2016-01-06 21:42:12 +08:00
|
|
|
// If we are emitting code for a target, the entry is already initialized,
|
|
|
|
// only has to be registered.
|
|
|
|
if (CGM.getLangOpts().OpenMPIsDevice) {
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
|
2016-01-06 21:42:12 +08:00
|
|
|
"Entry must exist.");
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
auto &Entry =
|
|
|
|
OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
|
2016-01-06 21:42:12 +08:00
|
|
|
assert(Entry.isValid() && "Entry not initialized!");
|
|
|
|
Entry.setAddress(Addr);
|
|
|
|
Entry.setID(ID);
|
[OpenMP] Add fields for flags in the offload entry descriptor.
Summary:
This patch adds two fields to the offload entry descriptor. One field is meant to signal Ctors/Dtors and `link` global variables, and the other is reserved for runtime library use.
Currently, these fields are only filled with zeros in the current code generation, but that will change when `declare target` is added.
The reason, we are adding these fields now is to make the code generation consistent with the runtime library proposal under review in https://reviews.llvm.org/D14031.
Reviewers: ABataev, hfinkel, carlo.bertolli, kkwli0, arpith-jacob, Hahnfeld
Subscribers: cfe-commits, caomhin, jholewinski
Differential Revision: https://reviews.llvm.org/D28298
llvm-svn: 291124
2017-01-06 00:02:49 +08:00
|
|
|
Entry.setFlags(Flags);
|
2016-01-06 21:42:12 +08:00
|
|
|
return;
|
|
|
|
} else {
|
[OpenMP] Add fields for flags in the offload entry descriptor.
Summary:
This patch adds two fields to the offload entry descriptor. One field is meant to signal Ctors/Dtors and `link` global variables, and the other is reserved for runtime library use.
Currently, these fields are only filled with zeros in the current code generation, but that will change when `declare target` is added.
The reason, we are adding these fields now is to make the code generation consistent with the runtime library proposal under review in https://reviews.llvm.org/D14031.
Reviewers: ABataev, hfinkel, carlo.bertolli, kkwli0, arpith-jacob, Hahnfeld
Subscribers: cfe-commits, caomhin, jholewinski
Differential Revision: https://reviews.llvm.org/D28298
llvm-svn: 291124
2017-01-06 00:02:49 +08:00
|
|
|
OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID, Flags);
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
|
2016-01-06 21:42:12 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
unsigned DeviceID, unsigned FileID, StringRef ParentName,
|
|
|
|
unsigned LineNum) const {
|
2016-01-06 21:42:12 +08:00
|
|
|
auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
|
|
|
|
if (PerDevice == OffloadEntriesTargetRegion.end())
|
|
|
|
return false;
|
|
|
|
auto PerFile = PerDevice->second.find(FileID);
|
|
|
|
if (PerFile == PerDevice->second.end())
|
|
|
|
return false;
|
|
|
|
auto PerParentName = PerFile->second.find(ParentName);
|
|
|
|
if (PerParentName == PerFile->second.end())
|
|
|
|
return false;
|
|
|
|
auto PerLine = PerParentName->second.find(LineNum);
|
|
|
|
if (PerLine == PerParentName->second.end())
|
|
|
|
return false;
|
|
|
|
// Fail if this entry is already registered.
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
if (PerLine->second.getAddress() || PerLine->second.getID())
|
2016-01-06 21:42:12 +08:00
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
|
|
|
|
const OffloadTargetRegionEntryInfoActTy &Action) {
|
|
|
|
// Scan all target region entries and perform the provided action.
|
|
|
|
for (auto &D : OffloadEntriesTargetRegion)
|
|
|
|
for (auto &F : D.second)
|
|
|
|
for (auto &P : F.second)
|
|
|
|
for (auto &L : P.second)
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
Action(D.first, F.first, P.first(), L.first, L.second);
|
2016-01-06 21:42:12 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// \brief Create a Ctor/Dtor-like function whose body is emitted through
|
|
|
|
/// \a Codegen. This is used to emit the two functions that register and
|
|
|
|
/// unregister the descriptor of the current compilation unit.
|
|
|
|
static llvm::Function *
|
|
|
|
createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name,
|
|
|
|
const RegionCodeGenTy &Codegen) {
|
|
|
|
auto &C = CGM.getContext();
|
|
|
|
FunctionArgList Args;
|
2017-06-09 21:40:18 +08:00
|
|
|
ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
|
2016-01-06 21:42:12 +08:00
|
|
|
Args.push_back(&DummyPtr);
|
|
|
|
|
|
|
|
CodeGenFunction CGF(CGM);
|
2016-03-11 12:30:31 +08:00
|
|
|
auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
|
2016-01-06 21:42:12 +08:00
|
|
|
auto FTy = CGM.getTypes().GetFunctionType(FI);
|
|
|
|
auto *Fn =
|
|
|
|
CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation());
|
|
|
|
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation());
|
|
|
|
Codegen(CGF);
|
|
|
|
CGF.FinishFunction();
|
|
|
|
return Fn;
|
|
|
|
}
|
|
|
|
|
|
|
|
llvm::Function *
|
|
|
|
CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
|
|
|
|
|
|
|
|
// If we don't have entries or if we are emitting code for the device, we
|
|
|
|
// don't need to do anything.
|
|
|
|
if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
auto &M = CGM.getModule();
|
|
|
|
auto &C = CGM.getContext();
|
|
|
|
|
|
|
|
// Get list of devices we care about
|
|
|
|
auto &Devices = CGM.getLangOpts().OMPTargetTriples;
|
|
|
|
|
|
|
|
// We should be creating an offloading descriptor only if there are devices
|
|
|
|
// specified.
|
|
|
|
assert(!Devices.empty() && "No OpenMP offloading devices??");
|
|
|
|
|
|
|
|
// Create the external variables that will point to the begin and end of the
|
|
|
|
// host entries section. These will be defined by the linker.
|
|
|
|
auto *OffloadEntryTy =
|
|
|
|
CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
|
|
|
|
llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable(
|
|
|
|
M, OffloadEntryTy, /*isConstant=*/true,
|
2016-01-27 03:01:06 +08:00
|
|
|
llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
|
2016-01-06 21:42:12 +08:00
|
|
|
".omp_offloading.entries_begin");
|
|
|
|
llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable(
|
|
|
|
M, OffloadEntryTy, /*isConstant=*/true,
|
2016-01-27 03:01:06 +08:00
|
|
|
llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
|
2016-01-06 21:42:12 +08:00
|
|
|
".omp_offloading.entries_end");
|
|
|
|
|
|
|
|
// Create all device images
|
|
|
|
auto *DeviceImageTy = cast<llvm::StructType>(
|
|
|
|
CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
|
2016-11-29 06:18:27 +08:00
|
|
|
ConstantInitBuilder DeviceImagesBuilder(CGM);
|
2016-11-19 16:17:24 +08:00
|
|
|
auto DeviceImagesEntries = DeviceImagesBuilder.beginArray(DeviceImageTy);
|
2016-01-06 21:42:12 +08:00
|
|
|
|
|
|
|
for (unsigned i = 0; i < Devices.size(); ++i) {
|
|
|
|
StringRef T = Devices[i].getTriple();
|
|
|
|
auto *ImgBegin = new llvm::GlobalVariable(
|
|
|
|
M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
|
2016-01-27 03:01:06 +08:00
|
|
|
/*Initializer=*/nullptr,
|
|
|
|
Twine(".omp_offloading.img_start.") + Twine(T));
|
2016-01-06 21:42:12 +08:00
|
|
|
auto *ImgEnd = new llvm::GlobalVariable(
|
|
|
|
M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
|
2016-01-27 03:01:06 +08:00
|
|
|
/*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T));
|
2016-01-06 21:42:12 +08:00
|
|
|
|
2016-11-19 16:17:24 +08:00
|
|
|
auto Dev = DeviceImagesEntries.beginStruct(DeviceImageTy);
|
|
|
|
Dev.add(ImgBegin);
|
|
|
|
Dev.add(ImgEnd);
|
|
|
|
Dev.add(HostEntriesBegin);
|
|
|
|
Dev.add(HostEntriesEnd);
|
2016-11-29 06:18:30 +08:00
|
|
|
Dev.finishAndAddTo(DeviceImagesEntries);
|
2016-01-06 21:42:12 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Create device images global array.
|
2016-11-19 16:17:24 +08:00
|
|
|
llvm::GlobalVariable *DeviceImages =
|
|
|
|
DeviceImagesEntries.finishAndCreateGlobal(".omp_offloading.device_images",
|
|
|
|
CGM.getPointerAlign(),
|
|
|
|
/*isConstant=*/true);
|
2016-06-15 05:02:05 +08:00
|
|
|
DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
|
2016-01-06 21:42:12 +08:00
|
|
|
|
|
|
|
// This is a Zero array to be used in the creation of the constant expressions
|
|
|
|
llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
|
|
|
|
llvm::Constant::getNullValue(CGM.Int32Ty)};
|
|
|
|
|
|
|
|
// Create the target region descriptor.
|
|
|
|
auto *BinaryDescriptorTy = cast<llvm::StructType>(
|
|
|
|
CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy()));
|
2016-11-29 06:18:27 +08:00
|
|
|
ConstantInitBuilder DescBuilder(CGM);
|
2016-11-19 16:17:24 +08:00
|
|
|
auto DescInit = DescBuilder.beginStruct(BinaryDescriptorTy);
|
|
|
|
DescInit.addInt(CGM.Int32Ty, Devices.size());
|
|
|
|
DescInit.add(llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
|
|
|
|
DeviceImages,
|
|
|
|
Index));
|
|
|
|
DescInit.add(HostEntriesBegin);
|
|
|
|
DescInit.add(HostEntriesEnd);
|
|
|
|
|
|
|
|
auto *Desc = DescInit.finishAndCreateGlobal(".omp_offloading.descriptor",
|
|
|
|
CGM.getPointerAlign(),
|
|
|
|
/*isConstant=*/true);
|
2016-01-06 21:42:12 +08:00
|
|
|
|
|
|
|
// Emit code to register or unregister the descriptor at execution
|
|
|
|
// startup or closing, respectively.
|
|
|
|
|
|
|
|
// Create a variable to drive the registration and unregistration of the
|
|
|
|
// descriptor, so we can reuse the logic that emits Ctors and Dtors.
|
|
|
|
auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var");
|
|
|
|
ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(),
|
2017-06-09 21:40:18 +08:00
|
|
|
IdentInfo, C.CharTy, ImplicitParamDecl::Other);
|
2016-01-06 21:42:12 +08:00
|
|
|
|
|
|
|
auto *UnRegFn = createOffloadingBinaryDescriptorFunction(
|
2016-03-29 13:34:15 +08:00
|
|
|
CGM, ".omp_offloading.descriptor_unreg",
|
|
|
|
[&](CodeGenFunction &CGF, PrePostActionTy &) {
|
2016-01-06 21:42:12 +08:00
|
|
|
CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
|
|
|
|
Desc);
|
|
|
|
});
|
|
|
|
auto *RegFn = createOffloadingBinaryDescriptorFunction(
|
2016-03-29 13:34:15 +08:00
|
|
|
CGM, ".omp_offloading.descriptor_reg",
|
|
|
|
[&](CodeGenFunction &CGF, PrePostActionTy &) {
|
2016-01-06 21:42:12 +08:00
|
|
|
CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib),
|
|
|
|
Desc);
|
|
|
|
CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
|
|
|
|
});
|
2017-05-27 11:03:13 +08:00
|
|
|
if (CGM.supportsCOMDAT()) {
|
|
|
|
// It is sufficient to call registration function only once, so create a
|
|
|
|
// COMDAT group for registration/unregistration functions and associated
|
|
|
|
// data. That would reduce startup time and code size. Registration
|
|
|
|
// function serves as a COMDAT group key.
|
|
|
|
auto ComdatKey = M.getOrInsertComdat(RegFn->getName());
|
|
|
|
RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
|
|
|
|
RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
|
|
|
|
RegFn->setComdat(ComdatKey);
|
|
|
|
UnRegFn->setComdat(ComdatKey);
|
|
|
|
DeviceImages->setComdat(ComdatKey);
|
|
|
|
Desc->setComdat(ComdatKey);
|
|
|
|
}
|
2016-01-06 21:42:12 +08:00
|
|
|
return RegFn;
|
|
|
|
}
|
|
|
|
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID,
|
[OpenMP] Add fields for flags in the offload entry descriptor.
Summary:
This patch adds two fields to the offload entry descriptor. One field is meant to signal Ctors/Dtors and `link` global variables, and the other is reserved for runtime library use.
Currently, these fields are only filled with zeros in the current code generation, but that will change when `declare target` is added.
The reason, we are adding these fields now is to make the code generation consistent with the runtime library proposal under review in https://reviews.llvm.org/D14031.
Reviewers: ABataev, hfinkel, carlo.bertolli, kkwli0, arpith-jacob, Hahnfeld
Subscribers: cfe-commits, caomhin, jholewinski
Differential Revision: https://reviews.llvm.org/D28298
llvm-svn: 291124
2017-01-06 00:02:49 +08:00
|
|
|
llvm::Constant *Addr, uint64_t Size,
|
|
|
|
int32_t Flags) {
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
StringRef Name = Addr->getName();
|
2016-01-06 21:42:12 +08:00
|
|
|
auto *TgtOffloadEntryType = cast<llvm::StructType>(
|
|
|
|
CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()));
|
|
|
|
llvm::LLVMContext &C = CGM.getModule().getContext();
|
|
|
|
llvm::Module &M = CGM.getModule();
|
|
|
|
|
|
|
|
// Make sure the address has the right type.
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy);
|
2016-01-06 21:42:12 +08:00
|
|
|
|
|
|
|
// Create constant string with the name.
|
|
|
|
llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
|
|
|
|
|
|
|
|
llvm::GlobalVariable *Str =
|
|
|
|
new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true,
|
|
|
|
llvm::GlobalValue::InternalLinkage, StrPtrInit,
|
|
|
|
".omp_offloading.entry_name");
|
2016-06-15 05:02:05 +08:00
|
|
|
Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
|
2016-01-06 21:42:12 +08:00
|
|
|
llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy);
|
|
|
|
|
2016-11-19 16:17:24 +08:00
|
|
|
// We can't have any padding between symbols, so we need to have 1-byte
|
|
|
|
// alignment.
|
|
|
|
auto Align = CharUnits::fromQuantity(1);
|
|
|
|
|
2016-01-06 21:42:12 +08:00
|
|
|
// Create the entry struct.
|
2016-11-29 06:18:27 +08:00
|
|
|
ConstantInitBuilder EntryBuilder(CGM);
|
2016-11-19 16:17:24 +08:00
|
|
|
auto EntryInit = EntryBuilder.beginStruct(TgtOffloadEntryType);
|
|
|
|
EntryInit.add(AddrPtr);
|
|
|
|
EntryInit.add(StrPtr);
|
|
|
|
EntryInit.addInt(CGM.SizeTy, Size);
|
[OpenMP] Add fields for flags in the offload entry descriptor.
Summary:
This patch adds two fields to the offload entry descriptor. One field is meant to signal Ctors/Dtors and `link` global variables, and the other is reserved for runtime library use.
Currently, these fields are only filled with zeros in the current code generation, but that will change when `declare target` is added.
The reason, we are adding these fields now is to make the code generation consistent with the runtime library proposal under review in https://reviews.llvm.org/D14031.
Reviewers: ABataev, hfinkel, carlo.bertolli, kkwli0, arpith-jacob, Hahnfeld
Subscribers: cfe-commits, caomhin, jholewinski
Differential Revision: https://reviews.llvm.org/D28298
llvm-svn: 291124
2017-01-06 00:02:49 +08:00
|
|
|
EntryInit.addInt(CGM.Int32Ty, Flags);
|
|
|
|
EntryInit.addInt(CGM.Int32Ty, 0);
|
2016-11-19 16:17:24 +08:00
|
|
|
llvm::GlobalVariable *Entry =
|
|
|
|
EntryInit.finishAndCreateGlobal(".omp_offloading.entry",
|
|
|
|
Align,
|
|
|
|
/*constant*/ true,
|
|
|
|
llvm::GlobalValue::ExternalLinkage);
|
2016-01-06 21:42:12 +08:00
|
|
|
|
|
|
|
// The entry has to be created in the section the linker expects it to be.
|
|
|
|
Entry->setSection(".omp_offloading.entries");
|
|
|
|
}
|
|
|
|
|
|
|
|
void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
|
|
|
|
// Emit the offloading entries and metadata so that the device codegen side
|
2016-12-13 02:00:20 +08:00
|
|
|
// can easily figure out what to emit. The produced metadata looks like
|
|
|
|
// this:
|
2016-01-06 21:42:12 +08:00
|
|
|
//
|
|
|
|
// !omp_offload.info = !{!1, ...}
|
|
|
|
//
|
|
|
|
// Right now we only generate metadata for function that contain target
|
|
|
|
// regions.
|
|
|
|
|
|
|
|
// If we do not have entries, we dont need to do anything.
|
|
|
|
if (OffloadEntriesInfoManager.empty())
|
|
|
|
return;
|
|
|
|
|
|
|
|
llvm::Module &M = CGM.getModule();
|
|
|
|
llvm::LLVMContext &C = M.getContext();
|
|
|
|
SmallVector<OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
|
|
|
|
OrderedEntries(OffloadEntriesInfoManager.size());
|
|
|
|
|
|
|
|
// Create the offloading info metadata node.
|
|
|
|
llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
|
|
|
|
|
2017-03-30 22:13:19 +08:00
|
|
|
// Auxiliary methods to create metadata values and strings.
|
2016-01-06 21:42:12 +08:00
|
|
|
auto getMDInt = [&](unsigned v) {
|
|
|
|
return llvm::ConstantAsMetadata::get(
|
|
|
|
llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v));
|
|
|
|
};
|
|
|
|
|
|
|
|
auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); };
|
|
|
|
|
|
|
|
// Create function that emits metadata for each target region entry;
|
|
|
|
auto &&TargetRegionMetadataEmitter = [&](
|
|
|
|
unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line,
|
|
|
|
OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
|
|
|
|
llvm::SmallVector<llvm::Metadata *, 32> Ops;
|
|
|
|
// Generate metadata for target regions. Each entry of this metadata
|
|
|
|
// contains:
|
|
|
|
// - Entry 0 -> Kind of this type of metadata (0).
|
|
|
|
// - Entry 1 -> Device ID of the file where the entry was identified.
|
|
|
|
// - Entry 2 -> File ID of the file where the entry was identified.
|
|
|
|
// - Entry 3 -> Mangled name of the function where the entry was identified.
|
|
|
|
// - Entry 4 -> Line in the file where the entry was identified.
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
// - Entry 5 -> Order the entry was created.
|
2016-01-06 21:42:12 +08:00
|
|
|
// The first element of the metadata node is the kind.
|
|
|
|
Ops.push_back(getMDInt(E.getKind()));
|
|
|
|
Ops.push_back(getMDInt(DeviceID));
|
|
|
|
Ops.push_back(getMDInt(FileID));
|
|
|
|
Ops.push_back(getMDString(ParentName));
|
|
|
|
Ops.push_back(getMDInt(Line));
|
|
|
|
Ops.push_back(getMDInt(E.getOrder()));
|
|
|
|
|
|
|
|
// Save this entry in the right position of the ordered entries array.
|
|
|
|
OrderedEntries[E.getOrder()] = &E;
|
|
|
|
|
|
|
|
// Add metadata to the named metadata node.
|
|
|
|
MD->addOperand(llvm::MDNode::get(C, Ops));
|
|
|
|
};
|
|
|
|
|
|
|
|
OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
|
|
|
|
TargetRegionMetadataEmitter);
|
|
|
|
|
|
|
|
for (auto *E : OrderedEntries) {
|
|
|
|
assert(E && "All ordered entries must exist!");
|
|
|
|
if (auto *CE =
|
|
|
|
dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
|
|
|
|
E)) {
|
|
|
|
assert(CE->getID() && CE->getAddress() &&
|
|
|
|
"Entry ID and Addr are invalid!");
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0);
|
2016-01-06 21:42:12 +08:00
|
|
|
} else
|
|
|
|
llvm_unreachable("Unsupported entry kind.");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// \brief Loads all the offload entries information from the host IR
|
|
|
|
/// metadata.
|
|
|
|
void CGOpenMPRuntime::loadOffloadInfoMetadata() {
|
|
|
|
// If we are in target mode, load the metadata from the host IR. This code has
|
|
|
|
// to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
|
|
|
|
|
|
|
|
if (!CGM.getLangOpts().OpenMPIsDevice)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (CGM.getLangOpts().OMPHostIRFile.empty())
|
|
|
|
return;
|
|
|
|
|
|
|
|
auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
|
|
|
|
if (Buf.getError())
|
|
|
|
return;
|
|
|
|
|
|
|
|
llvm::LLVMContext C;
|
2016-11-13 15:00:17 +08:00
|
|
|
auto ME = expectedToErrorOrAndEmitErrors(
|
|
|
|
C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
|
2016-01-06 21:42:12 +08:00
|
|
|
|
|
|
|
if (ME.getError())
|
|
|
|
return;
|
|
|
|
|
|
|
|
llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
|
|
|
|
if (!MD)
|
|
|
|
return;
|
|
|
|
|
|
|
|
for (auto I : MD->operands()) {
|
|
|
|
llvm::MDNode *MN = cast<llvm::MDNode>(I);
|
|
|
|
|
|
|
|
auto getMDInt = [&](unsigned Idx) {
|
|
|
|
llvm::ConstantAsMetadata *V =
|
|
|
|
cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
|
|
|
|
return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
|
|
|
|
};
|
|
|
|
|
|
|
|
auto getMDString = [&](unsigned Idx) {
|
|
|
|
llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx));
|
|
|
|
return V->getString();
|
|
|
|
};
|
|
|
|
|
|
|
|
switch (getMDInt(0)) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unexpected metadata!");
|
|
|
|
break;
|
|
|
|
case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
|
|
|
|
OFFLOAD_ENTRY_INFO_TARGET_REGION:
|
|
|
|
OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
|
|
|
|
/*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2),
|
|
|
|
/*ParentName=*/getMDString(3), /*Line=*/getMDInt(4),
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
/*Order=*/getMDInt(5));
|
2016-01-06 21:42:12 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
|
|
|
|
if (!KmpRoutineEntryPtrTy) {
|
|
|
|
// Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
|
|
|
|
auto &C = CGM.getContext();
|
|
|
|
QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
|
|
|
|
FunctionProtoType::ExtProtoInfo EPI;
|
|
|
|
KmpRoutineEntryPtrQTy = C.getPointerType(
|
|
|
|
C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
|
|
|
|
KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-09-11 18:29:41 +08:00
|
|
|
static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
|
|
|
|
QualType FieldTy) {
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
auto *Field = FieldDecl::Create(
|
|
|
|
C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
|
|
|
|
C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
|
|
|
|
/*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
|
|
|
|
Field->setAccess(AS_public);
|
|
|
|
DC->addDecl(Field);
|
2015-09-11 18:29:41 +08:00
|
|
|
return Field;
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
}
|
|
|
|
|
2016-01-06 21:42:12 +08:00
|
|
|
QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
|
|
|
|
|
|
|
|
// Make sure the type of the entry is already created. This is the type we
|
|
|
|
// have to create:
|
|
|
|
// struct __tgt_offload_entry{
|
|
|
|
// void *addr; // Pointer to the offload entry info.
|
|
|
|
// // (function or global)
|
|
|
|
// char *name; // Name of the function or global.
|
|
|
|
// size_t size; // Size of the entry info (0 if it a function).
|
[OpenMP] Add fields for flags in the offload entry descriptor.
Summary:
This patch adds two fields to the offload entry descriptor. One field is meant to signal Ctors/Dtors and `link` global variables, and the other is reserved for runtime library use.
Currently, these fields are only filled with zeros in the current code generation, but that will change when `declare target` is added.
The reason, we are adding these fields now is to make the code generation consistent with the runtime library proposal under review in https://reviews.llvm.org/D14031.
Reviewers: ABataev, hfinkel, carlo.bertolli, kkwli0, arpith-jacob, Hahnfeld
Subscribers: cfe-commits, caomhin, jholewinski
Differential Revision: https://reviews.llvm.org/D28298
llvm-svn: 291124
2017-01-06 00:02:49 +08:00
|
|
|
// int32_t flags; // Flags associated with the entry, e.g. 'link'.
|
|
|
|
// int32_t reserved; // Reserved, to use by the runtime library.
|
2016-01-06 21:42:12 +08:00
|
|
|
// };
|
|
|
|
if (TgtOffloadEntryQTy.isNull()) {
|
|
|
|
ASTContext &C = CGM.getContext();
|
|
|
|
auto *RD = C.buildImplicitRecord("__tgt_offload_entry");
|
|
|
|
RD->startDefinition();
|
|
|
|
addFieldToRecordDecl(C, RD, C.VoidPtrTy);
|
|
|
|
addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
|
|
|
|
addFieldToRecordDecl(C, RD, C.getSizeType());
|
[OpenMP] Add fields for flags in the offload entry descriptor.
Summary:
This patch adds two fields to the offload entry descriptor. One field is meant to signal Ctors/Dtors and `link` global variables, and the other is reserved for runtime library use.
Currently, these fields are only filled with zeros in the current code generation, but that will change when `declare target` is added.
The reason, we are adding these fields now is to make the code generation consistent with the runtime library proposal under review in https://reviews.llvm.org/D14031.
Reviewers: ABataev, hfinkel, carlo.bertolli, kkwli0, arpith-jacob, Hahnfeld
Subscribers: cfe-commits, caomhin, jholewinski
Differential Revision: https://reviews.llvm.org/D28298
llvm-svn: 291124
2017-01-06 00:02:49 +08:00
|
|
|
addFieldToRecordDecl(
|
|
|
|
C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
|
|
|
|
addFieldToRecordDecl(
|
|
|
|
C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
|
2016-01-06 21:42:12 +08:00
|
|
|
RD->completeDefinition();
|
|
|
|
TgtOffloadEntryQTy = C.getRecordType(RD);
|
|
|
|
}
|
|
|
|
return TgtOffloadEntryQTy;
|
|
|
|
}
|
|
|
|
|
|
|
|
QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
|
|
|
|
// These are the types we need to build:
|
|
|
|
// struct __tgt_device_image{
|
|
|
|
// void *ImageStart; // Pointer to the target code start.
|
|
|
|
// void *ImageEnd; // Pointer to the target code end.
|
|
|
|
// // We also add the host entries to the device image, as it may be useful
|
|
|
|
// // for the target runtime to have access to that information.
|
|
|
|
// __tgt_offload_entry *EntriesBegin; // Begin of the table with all
|
|
|
|
// // the entries.
|
|
|
|
// __tgt_offload_entry *EntriesEnd; // End of the table with all the
|
|
|
|
// // entries (non inclusive).
|
|
|
|
// };
|
|
|
|
if (TgtDeviceImageQTy.isNull()) {
|
|
|
|
ASTContext &C = CGM.getContext();
|
|
|
|
auto *RD = C.buildImplicitRecord("__tgt_device_image");
|
|
|
|
RD->startDefinition();
|
|
|
|
addFieldToRecordDecl(C, RD, C.VoidPtrTy);
|
|
|
|
addFieldToRecordDecl(C, RD, C.VoidPtrTy);
|
|
|
|
addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
|
|
|
|
addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
|
|
|
|
RD->completeDefinition();
|
|
|
|
TgtDeviceImageQTy = C.getRecordType(RD);
|
|
|
|
}
|
|
|
|
return TgtDeviceImageQTy;
|
|
|
|
}
|
|
|
|
|
|
|
|
QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
|
|
|
|
// struct __tgt_bin_desc{
|
|
|
|
// int32_t NumDevices; // Number of devices supported.
|
|
|
|
// __tgt_device_image *DeviceImages; // Arrays of device images
|
|
|
|
// // (one per device).
|
|
|
|
// __tgt_offload_entry *EntriesBegin; // Begin of the table with all the
|
|
|
|
// // entries.
|
|
|
|
// __tgt_offload_entry *EntriesEnd; // End of the table with all the
|
|
|
|
// // entries (non inclusive).
|
|
|
|
// };
|
|
|
|
if (TgtBinaryDescriptorQTy.isNull()) {
|
|
|
|
ASTContext &C = CGM.getContext();
|
|
|
|
auto *RD = C.buildImplicitRecord("__tgt_bin_desc");
|
|
|
|
RD->startDefinition();
|
|
|
|
addFieldToRecordDecl(
|
|
|
|
C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
|
|
|
|
addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
|
|
|
|
addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
|
|
|
|
addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
|
|
|
|
RD->completeDefinition();
|
|
|
|
TgtBinaryDescriptorQTy = C.getRecordType(RD);
|
|
|
|
}
|
|
|
|
return TgtBinaryDescriptorQTy;
|
|
|
|
}
|
|
|
|
|
2015-04-30 14:51:57 +08:00
|
|
|
namespace {
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
struct PrivateHelpersTy {
|
|
|
|
PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
|
|
|
|
const VarDecl *PrivateElemInit)
|
|
|
|
: Original(Original), PrivateCopy(PrivateCopy),
|
|
|
|
PrivateElemInit(PrivateElemInit) {}
|
|
|
|
const VarDecl *Original;
|
|
|
|
const VarDecl *PrivateCopy;
|
|
|
|
const VarDecl *PrivateElemInit;
|
|
|
|
};
|
|
|
|
typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
|
2015-09-11 01:07:54 +08:00
|
|
|
} // anonymous namespace
|
2015-04-30 14:51:57 +08:00
|
|
|
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
static RecordDecl *
|
2015-09-29 12:30:07 +08:00
|
|
|
createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
|
2015-04-30 14:51:57 +08:00
|
|
|
if (!Privates.empty()) {
|
|
|
|
auto &C = CGM.getContext();
|
|
|
|
// Build struct .kmp_privates_t. {
|
|
|
|
// /* private vars */
|
|
|
|
// };
|
|
|
|
auto *RD = C.buildImplicitRecord(".kmp_privates.t");
|
|
|
|
RD->startDefinition();
|
|
|
|
for (auto &&Pair : Privates) {
|
2015-09-11 18:29:41 +08:00
|
|
|
auto *VD = Pair.second.Original;
|
|
|
|
auto Type = VD->getType();
|
2015-05-19 20:31:28 +08:00
|
|
|
Type = Type.getNonReferenceType();
|
2015-09-11 18:29:41 +08:00
|
|
|
auto *FD = addFieldToRecordDecl(C, RD, Type);
|
|
|
|
if (VD->hasAttrs()) {
|
|
|
|
for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
|
|
|
|
E(VD->getAttrs().end());
|
|
|
|
I != E; ++I)
|
|
|
|
FD->addAttr(*I);
|
|
|
|
}
|
2015-04-30 14:51:57 +08:00
|
|
|
}
|
|
|
|
RD->completeDefinition();
|
|
|
|
return RD;
|
|
|
|
}
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
static RecordDecl *
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
|
|
|
|
QualType KmpInt32Ty,
|
2015-05-18 15:54:53 +08:00
|
|
|
QualType KmpRoutineEntryPointerQTy) {
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
auto &C = CGM.getContext();
|
|
|
|
// Build struct kmp_task_t {
|
|
|
|
// void * shareds;
|
|
|
|
// kmp_routine_entry_t routine;
|
|
|
|
// kmp_int32 part_id;
|
2016-05-30 17:06:50 +08:00
|
|
|
// kmp_cmplrdata_t data1;
|
|
|
|
// kmp_cmplrdata_t data2;
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
// For taskloops additional fields:
|
|
|
|
// kmp_uint64 lb;
|
|
|
|
// kmp_uint64 ub;
|
|
|
|
// kmp_int64 st;
|
|
|
|
// kmp_int32 liter;
|
2017-07-17 21:30:36 +08:00
|
|
|
// void * reductions;
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
// };
|
2016-05-30 17:06:50 +08:00
|
|
|
auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
|
|
|
|
UD->startDefinition();
|
|
|
|
addFieldToRecordDecl(C, UD, KmpInt32Ty);
|
|
|
|
addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
|
|
|
|
UD->completeDefinition();
|
|
|
|
QualType KmpCmplrdataTy = C.getRecordType(UD);
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
auto *RD = C.buildImplicitRecord("kmp_task_t");
|
|
|
|
RD->startDefinition();
|
|
|
|
addFieldToRecordDecl(C, RD, C.VoidPtrTy);
|
|
|
|
addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
|
|
|
|
addFieldToRecordDecl(C, RD, KmpInt32Ty);
|
2016-05-30 17:06:50 +08:00
|
|
|
addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
|
|
|
|
addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
if (isOpenMPTaskLoopDirective(Kind)) {
|
|
|
|
QualType KmpUInt64Ty =
|
|
|
|
CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
|
|
|
|
QualType KmpInt64Ty =
|
|
|
|
CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
|
|
|
|
addFieldToRecordDecl(C, RD, KmpUInt64Ty);
|
|
|
|
addFieldToRecordDecl(C, RD, KmpUInt64Ty);
|
|
|
|
addFieldToRecordDecl(C, RD, KmpInt64Ty);
|
|
|
|
addFieldToRecordDecl(C, RD, KmpInt32Ty);
|
2017-07-17 21:30:36 +08:00
|
|
|
addFieldToRecordDecl(C, RD, C.VoidPtrTy);
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
}
|
2015-05-18 15:54:53 +08:00
|
|
|
RD->completeDefinition();
|
|
|
|
return RD;
|
|
|
|
}
|
|
|
|
|
|
|
|
static RecordDecl *
|
|
|
|
createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
|
2015-09-29 12:30:07 +08:00
|
|
|
ArrayRef<PrivateDataTy> Privates) {
|
2015-05-18 15:54:53 +08:00
|
|
|
auto &C = CGM.getContext();
|
|
|
|
// Build struct kmp_task_t_with_privates {
|
|
|
|
// kmp_task_t task_data;
|
|
|
|
// .kmp_privates_t. privates;
|
|
|
|
// };
|
|
|
|
auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
|
|
|
|
RD->startDefinition();
|
|
|
|
addFieldToRecordDecl(C, RD, KmpTaskTQTy);
|
2015-04-30 14:51:57 +08:00
|
|
|
if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
|
|
|
|
addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
|
|
|
|
}
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
RD->completeDefinition();
|
2015-04-30 14:51:57 +08:00
|
|
|
return RD;
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// \brief Emit a proxy function which accepts kmp_task_t as the second
|
|
|
|
/// argument.
|
|
|
|
/// \code
|
|
|
|
/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
|
2016-04-20 12:01:36 +08:00
|
|
|
/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
/// For taskloops:
|
|
|
|
/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
|
2017-07-17 21:30:36 +08:00
|
|
|
/// tt->reductions, tt->shareds);
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
/// return 0;
|
|
|
|
/// }
|
|
|
|
/// \endcode
|
|
|
|
static llvm::Value *
|
|
|
|
emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
|
|
|
|
QualType KmpTaskTWithPrivatesPtrQTy,
|
2015-05-18 15:54:53 +08:00
|
|
|
QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
QualType SharedsPtrTy, llvm::Value *TaskFunction,
|
|
|
|
llvm::Value *TaskPrivatesMap) {
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
auto &C = CGM.getContext();
|
|
|
|
FunctionArgList Args;
|
2017-06-09 21:40:18 +08:00
|
|
|
ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
|
|
|
|
ImplicitParamDecl::Other);
|
|
|
|
ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
|
|
|
|
KmpTaskTWithPrivatesPtrQTy.withRestrict(),
|
|
|
|
ImplicitParamDecl::Other);
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
Args.push_back(&GtidArg);
|
|
|
|
Args.push_back(&TaskTypeArg);
|
|
|
|
auto &TaskEntryFnInfo =
|
2016-03-11 12:30:31 +08:00
|
|
|
CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
|
|
|
|
auto *TaskEntry =
|
|
|
|
llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
|
|
|
|
".omp_task_entry.", &CGM.getModule());
|
2015-10-28 10:30:47 +08:00
|
|
|
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo);
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
CodeGenFunction CGF(CGM);
|
|
|
|
CGF.disableDebugInfo();
|
|
|
|
CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
|
|
|
|
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
// TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
// tt,
|
|
|
|
// For taskloops:
|
|
|
|
// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
|
|
|
|
// tt->task_data.shareds);
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
auto *GtidParam = CGF.EmitLoadOfScalar(
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
|
2016-02-04 19:27:03 +08:00
|
|
|
LValue TDBase = CGF.EmitLoadOfPointerLValue(
|
|
|
|
CGF.GetAddrOfLocalVar(&TaskTypeArg),
|
|
|
|
KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
|
2015-05-18 15:54:53 +08:00
|
|
|
auto *KmpTaskTWithPrivatesQTyRD =
|
|
|
|
cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
LValue Base =
|
|
|
|
CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
|
2015-05-18 15:54:53 +08:00
|
|
|
auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
|
|
|
|
auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
|
|
|
|
auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
|
2016-04-20 12:01:36 +08:00
|
|
|
auto *PartidParam = PartIdLVal.getPointer();
|
2015-05-18 15:54:53 +08:00
|
|
|
|
|
|
|
auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
|
|
|
|
auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
|
2015-04-30 14:51:57 +08:00
|
|
|
auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
2015-05-18 15:54:53 +08:00
|
|
|
CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
|
2015-04-30 14:51:57 +08:00
|
|
|
CGF.ConvertTypeForMem(SharedsPtrTy));
|
|
|
|
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
|
|
|
|
llvm::Value *PrivatesParam;
|
|
|
|
if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
|
|
|
|
auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
|
|
|
|
PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
PrivatesLVal.getPointer(), CGF.VoidPtrTy);
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
} else
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
|
|
|
|
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
|
|
|
|
TaskPrivatesMap,
|
|
|
|
CGF.Builder
|
|
|
|
.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
|
TDBase.getAddress(), CGF.VoidPtrTy)
|
|
|
|
.getPointer()};
|
|
|
|
SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
|
|
|
|
std::end(CommonArgs));
|
|
|
|
if (isOpenMPTaskLoopDirective(Kind)) {
|
|
|
|
auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
|
|
|
|
auto LBLVal = CGF.EmitLValueForField(Base, *LBFI);
|
|
|
|
auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal();
|
|
|
|
auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
|
|
|
|
auto UBLVal = CGF.EmitLValueForField(Base, *UBFI);
|
|
|
|
auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal();
|
|
|
|
auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
|
|
|
|
auto StLVal = CGF.EmitLValueForField(Base, *StFI);
|
|
|
|
auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal();
|
|
|
|
auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
|
|
|
|
auto LILVal = CGF.EmitLValueForField(Base, *LIFI);
|
|
|
|
auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal();
|
2017-07-17 21:30:36 +08:00
|
|
|
auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
|
|
|
|
auto RLVal = CGF.EmitLValueForField(Base, *RFI);
|
|
|
|
auto *RParam = CGF.EmitLoadOfLValue(RLVal, Loc).getScalarVal();
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
CallArgs.push_back(LBParam);
|
|
|
|
CallArgs.push_back(UBParam);
|
|
|
|
CallArgs.push_back(StParam);
|
|
|
|
CallArgs.push_back(LIParam);
|
2017-07-17 21:30:36 +08:00
|
|
|
CallArgs.push_back(RParam);
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
}
|
|
|
|
CallArgs.push_back(SharedsParam);
|
|
|
|
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
|
|
|
|
CGF.EmitStoreThroughLValue(
|
|
|
|
RValue::get(CGF.Builder.getInt32(/*C=*/0)),
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
CGF.FinishFunction();
|
|
|
|
return TaskEntry;
|
|
|
|
}
|
|
|
|
|
2015-05-18 15:54:53 +08:00
|
|
|
static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
|
|
|
|
SourceLocation Loc,
|
|
|
|
QualType KmpInt32Ty,
|
|
|
|
QualType KmpTaskTWithPrivatesPtrQTy,
|
|
|
|
QualType KmpTaskTWithPrivatesQTy) {
|
2015-04-30 14:51:57 +08:00
|
|
|
auto &C = CGM.getContext();
|
|
|
|
FunctionArgList Args;
|
2017-06-09 21:40:18 +08:00
|
|
|
ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
|
|
|
|
ImplicitParamDecl::Other);
|
|
|
|
ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
|
|
|
|
KmpTaskTWithPrivatesPtrQTy.withRestrict(),
|
|
|
|
ImplicitParamDecl::Other);
|
2015-04-30 14:51:57 +08:00
|
|
|
Args.push_back(&GtidArg);
|
|
|
|
Args.push_back(&TaskTypeArg);
|
|
|
|
FunctionType::ExtInfo Info;
|
|
|
|
auto &DestructorFnInfo =
|
2016-03-11 12:30:31 +08:00
|
|
|
CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
|
2015-04-30 14:51:57 +08:00
|
|
|
auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
|
|
|
|
auto *DestructorFn =
|
|
|
|
llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
|
|
|
|
".omp_task_destructor.", &CGM.getModule());
|
2015-10-28 10:30:47 +08:00
|
|
|
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn,
|
|
|
|
DestructorFnInfo);
|
2015-04-30 14:51:57 +08:00
|
|
|
CodeGenFunction CGF(CGM);
|
|
|
|
CGF.disableDebugInfo();
|
|
|
|
CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
|
|
|
|
Args);
|
|
|
|
|
2016-02-04 19:27:03 +08:00
|
|
|
LValue Base = CGF.EmitLoadOfPointerLValue(
|
|
|
|
CGF.GetAddrOfLocalVar(&TaskTypeArg),
|
|
|
|
KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
|
2015-05-18 15:54:53 +08:00
|
|
|
auto *KmpTaskTWithPrivatesQTyRD =
|
|
|
|
cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
|
|
|
|
auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
|
2015-04-30 14:51:57 +08:00
|
|
|
Base = CGF.EmitLValueForField(Base, *FI);
|
|
|
|
for (auto *Field :
|
|
|
|
cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
|
|
|
|
if (auto DtorKind = Field->getType().isDestructedType()) {
|
|
|
|
auto FieldLValue = CGF.EmitLValueForField(Base, Field);
|
|
|
|
CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
CGF.FinishFunction();
|
|
|
|
return DestructorFn;
|
|
|
|
}
|
|
|
|
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
/// \brief Emit a privates mapping function for correct handling of private and
|
|
|
|
/// firstprivate variables.
|
|
|
|
/// \code
|
|
|
|
/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
|
|
|
|
/// **noalias priv1,..., <tyn> **noalias privn) {
|
|
|
|
/// *priv1 = &.privates.priv1;
|
|
|
|
/// ...;
|
|
|
|
/// *privn = &.privates.privn;
|
|
|
|
/// }
|
|
|
|
/// \endcode
|
|
|
|
static llvm::Value *
|
|
|
|
emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
|
2015-09-29 12:30:07 +08:00
|
|
|
ArrayRef<const Expr *> PrivateVars,
|
|
|
|
ArrayRef<const Expr *> FirstprivateVars,
|
2016-05-05 16:46:22 +08:00
|
|
|
ArrayRef<const Expr *> LastprivateVars,
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
QualType PrivatesQTy,
|
2015-09-29 12:30:07 +08:00
|
|
|
ArrayRef<PrivateDataTy> Privates) {
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
auto &C = CGM.getContext();
|
|
|
|
FunctionArgList Args;
|
|
|
|
ImplicitParamDecl TaskPrivatesArg(
|
|
|
|
C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
|
2017-06-09 21:40:18 +08:00
|
|
|
C.getPointerType(PrivatesQTy).withConst().withRestrict(),
|
|
|
|
ImplicitParamDecl::Other);
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
Args.push_back(&TaskPrivatesArg);
|
|
|
|
llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
|
|
|
|
unsigned Counter = 1;
|
|
|
|
for (auto *E: PrivateVars) {
|
|
|
|
Args.push_back(ImplicitParamDecl::Create(
|
2017-06-09 21:40:18 +08:00
|
|
|
C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
|
|
|
|
C.getPointerType(C.getPointerType(E->getType()))
|
|
|
|
.withConst()
|
|
|
|
.withRestrict(),
|
|
|
|
ImplicitParamDecl::Other));
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
|
|
|
|
PrivateVarsPos[VD] = Counter;
|
|
|
|
++Counter;
|
|
|
|
}
|
|
|
|
for (auto *E : FirstprivateVars) {
|
|
|
|
Args.push_back(ImplicitParamDecl::Create(
|
2017-06-09 21:40:18 +08:00
|
|
|
C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
|
|
|
|
C.getPointerType(C.getPointerType(E->getType()))
|
|
|
|
.withConst()
|
|
|
|
.withRestrict(),
|
|
|
|
ImplicitParamDecl::Other));
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
|
|
|
|
PrivateVarsPos[VD] = Counter;
|
|
|
|
++Counter;
|
|
|
|
}
|
2016-05-05 16:46:22 +08:00
|
|
|
for (auto *E: LastprivateVars) {
|
|
|
|
Args.push_back(ImplicitParamDecl::Create(
|
2017-06-09 21:40:18 +08:00
|
|
|
C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
|
|
|
|
C.getPointerType(C.getPointerType(E->getType()))
|
|
|
|
.withConst()
|
|
|
|
.withRestrict(),
|
|
|
|
ImplicitParamDecl::Other));
|
2016-05-05 16:46:22 +08:00
|
|
|
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
|
|
|
|
PrivateVarsPos[VD] = Counter;
|
|
|
|
++Counter;
|
|
|
|
}
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
auto &TaskPrivatesMapFnInfo =
|
2016-03-11 12:30:31 +08:00
|
|
|
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
auto *TaskPrivatesMapTy =
|
|
|
|
CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
|
|
|
|
auto *TaskPrivatesMap = llvm::Function::Create(
|
|
|
|
TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
|
|
|
|
".omp_task_privates_map.", &CGM.getModule());
|
2015-10-28 10:30:47 +08:00
|
|
|
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap,
|
|
|
|
TaskPrivatesMapFnInfo);
|
Cleanup the handling of noinline function attributes, -fno-inline,
-fno-inline-functions, -O0, and optnone.
These were really, really tangled together:
- We used the noinline LLVM attribute for -fno-inline
- But not for -fno-inline-functions (breaking LTO)
- But we did use it for -finline-hint-functions (yay, LTO is happy!)
- But we didn't for -O0 (LTO is sad yet again...)
- We had weird structuring of CodeGenOpts with both an inlining
enumeration and a boolean. They interacted in weird ways and
needlessly.
- A *lot* of set smashing went on with setting these, and then got worse
when we considered optnone and other inlining-effecting attributes.
- A bunch of inline affecting attributes were managed in a completely
different place from -fno-inline.
- Even with -fno-inline we failed to put the LLVM noinline attribute
onto many generated function definitions because they didn't show up
as AST-level functions.
- If you passed -O0 but -finline-functions we would run the normal
inliner pass in LLVM despite it being in the O0 pipeline, which really
doesn't make much sense.
- Lastly, we used things like '-fno-inline' to manipulate the pass
pipeline which forced the pass pipeline to be much more
parameterizable than it really needs to be. Instead we can *just* use
the optimization level to select a pipeline and control the rest via
attributes.
Sadly, this causes a bunch of churn in tests because we don't run the
optimizer in the tests and check the contents of attribute sets. It
would be awesome if attribute sets were a bit more FileCheck friendly,
but oh well.
I think this is a significant improvement and should remove the semantic
need to change what inliner pass we run in order to comply with the
requested inlining semantics by relying completely on attributes. It
also cleans up tho optnone and related handling a bit.
One unfortunate aspect of this is that for generating alwaysinline
routines like those in OpenMP we end up removing noinline and then
adding alwaysinline. I tried a bunch of other approaches, but because we
recompute function attributes from scratch and don't have a declaration
here I couldn't find anything substantially cleaner than this.
Differential Revision: https://reviews.llvm.org/D28053
llvm-svn: 290398
2016-12-23 09:24:49 +08:00
|
|
|
TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
|
2017-05-29 13:38:20 +08:00
|
|
|
TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
|
2015-09-15 05:35:16 +08:00
|
|
|
TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
CodeGenFunction CGF(CGM);
|
|
|
|
CGF.disableDebugInfo();
|
|
|
|
CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
|
|
|
|
TaskPrivatesMapFnInfo, Args);
|
|
|
|
|
|
|
|
// *privi = &.privates.privi;
|
2016-02-04 19:27:03 +08:00
|
|
|
LValue Base = CGF.EmitLoadOfPointerLValue(
|
|
|
|
CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
|
|
|
|
TaskPrivatesArg.getType()->castAs<PointerType>());
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
|
|
|
|
Counter = 0;
|
|
|
|
for (auto *Field : PrivatesQTyRD->fields()) {
|
|
|
|
auto FieldLVal = CGF.EmitLValueForField(Base, Field);
|
|
|
|
auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
|
2016-02-04 19:27:03 +08:00
|
|
|
auto RefLoadLVal = CGF.EmitLoadOfPointerLValue(
|
|
|
|
RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
|
2015-09-10 16:12:02 +08:00
|
|
|
CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
++Counter;
|
|
|
|
}
|
|
|
|
CGF.FinishFunction();
|
|
|
|
return TaskPrivatesMap;
|
|
|
|
}
|
|
|
|
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
static int array_pod_sort_comparator(const PrivateDataTy *P1,
|
|
|
|
const PrivateDataTy *P2) {
|
2015-04-30 14:51:57 +08:00
|
|
|
return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
|
|
|
|
}
|
|
|
|
|
2016-05-05 16:46:22 +08:00
|
|
|
/// Emit initialization for private variables in task-based directives.
|
2016-05-10 18:36:51 +08:00
|
|
|
static void emitPrivatesInit(CodeGenFunction &CGF,
|
2016-05-05 16:46:22 +08:00
|
|
|
const OMPExecutableDirective &D,
|
|
|
|
Address KmpTaskSharedsPtr, LValue TDBase,
|
|
|
|
const RecordDecl *KmpTaskTWithPrivatesQTyRD,
|
|
|
|
QualType SharedsTy, QualType SharedsPtrTy,
|
|
|
|
const OMPTaskDataTy &Data,
|
|
|
|
ArrayRef<PrivateDataTy> Privates, bool ForDup) {
|
|
|
|
auto &C = CGF.getContext();
|
|
|
|
auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
|
|
|
|
LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
|
|
|
|
LValue SrcBase;
|
|
|
|
if (!Data.FirstprivateVars.empty()) {
|
|
|
|
SrcBase = CGF.MakeAddrLValue(
|
|
|
|
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
|
KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
|
|
|
|
SharedsTy);
|
|
|
|
}
|
|
|
|
CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
|
|
|
|
cast<CapturedStmt>(*D.getAssociatedStmt()));
|
|
|
|
FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
|
|
|
|
for (auto &&Pair : Privates) {
|
|
|
|
auto *VD = Pair.second.PrivateCopy;
|
|
|
|
auto *Init = VD->getAnyInitializer();
|
|
|
|
if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
|
|
|
|
!CGF.isTrivialInitializer(Init)))) {
|
2016-05-10 18:36:51 +08:00
|
|
|
LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
|
2016-05-05 16:46:22 +08:00
|
|
|
if (auto *Elem = Pair.second.PrivateElemInit) {
|
|
|
|
auto *OriginalVD = Pair.second.Original;
|
|
|
|
auto *SharedField = CapturesInfo.lookup(OriginalVD);
|
|
|
|
auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
|
|
|
|
SharedRefLValue = CGF.MakeAddrLValue(
|
|
|
|
Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
|
2017-05-19 01:07:11 +08:00
|
|
|
SharedRefLValue.getType(),
|
|
|
|
LValueBaseInfo(AlignmentSource::Decl,
|
|
|
|
SharedRefLValue.getBaseInfo().getMayAlias()));
|
2016-05-05 16:46:22 +08:00
|
|
|
QualType Type = OriginalVD->getType();
|
|
|
|
if (Type->isArrayType()) {
|
|
|
|
// Initialize firstprivate array.
|
|
|
|
if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
|
|
|
|
// Perform simple memcpy.
|
|
|
|
CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
|
|
|
|
SharedRefLValue.getAddress(), Type);
|
|
|
|
} else {
|
|
|
|
// Initialize firstprivate array using element-by-element
|
2017-03-30 22:13:19 +08:00
|
|
|
// initialization.
|
2016-05-05 16:46:22 +08:00
|
|
|
CGF.EmitOMPAggregateAssign(
|
|
|
|
PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
|
|
|
|
[&CGF, Elem, Init, &CapturesInfo](Address DestElement,
|
|
|
|
Address SrcElement) {
|
|
|
|
// Clean up any temporaries needed by the initialization.
|
|
|
|
CodeGenFunction::OMPPrivateScope InitScope(CGF);
|
|
|
|
InitScope.addPrivate(
|
|
|
|
Elem, [SrcElement]() -> Address { return SrcElement; });
|
|
|
|
(void)InitScope.Privatize();
|
|
|
|
// Emit initialization for single element.
|
|
|
|
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
|
|
|
|
CGF, &CapturesInfo);
|
|
|
|
CGF.EmitAnyExprToMem(Init, DestElement,
|
|
|
|
Init->getType().getQualifiers(),
|
|
|
|
/*IsInitializer=*/false);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
CodeGenFunction::OMPPrivateScope InitScope(CGF);
|
|
|
|
InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
|
|
|
|
return SharedRefLValue.getAddress();
|
|
|
|
});
|
|
|
|
(void)InitScope.Privatize();
|
|
|
|
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
|
|
|
|
CGF.EmitExprAsInit(Init, VD, PrivateLValue,
|
|
|
|
/*capturedByInit=*/false);
|
|
|
|
}
|
|
|
|
} else
|
|
|
|
CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
|
|
|
|
}
|
|
|
|
++FI;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Check if duplication function is required for taskloops.
|
|
|
|
static bool checkInitIsRequired(CodeGenFunction &CGF,
|
|
|
|
ArrayRef<PrivateDataTy> Privates) {
|
|
|
|
bool InitRequired = false;
|
|
|
|
for (auto &&Pair : Privates) {
|
|
|
|
auto *VD = Pair.second.PrivateCopy;
|
|
|
|
auto *Init = VD->getAnyInitializer();
|
|
|
|
InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
|
|
|
|
!CGF.isTrivialInitializer(Init));
|
|
|
|
}
|
|
|
|
return InitRequired;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Emit task_dup function (for initialization of
|
|
|
|
/// private/firstprivate/lastprivate vars and last_iter flag)
|
|
|
|
/// \code
|
|
|
|
/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
|
|
|
|
/// lastpriv) {
|
|
|
|
/// // setup lastprivate flag
|
|
|
|
/// task_dst->last = lastpriv;
|
|
|
|
/// // could be constructor calls here...
|
|
|
|
/// }
|
|
|
|
/// \endcode
|
|
|
|
static llvm::Value *
|
|
|
|
emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
|
|
|
|
const OMPExecutableDirective &D,
|
|
|
|
QualType KmpTaskTWithPrivatesPtrQTy,
|
|
|
|
const RecordDecl *KmpTaskTWithPrivatesQTyRD,
|
|
|
|
const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
|
|
|
|
QualType SharedsPtrTy, const OMPTaskDataTy &Data,
|
|
|
|
ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
|
|
|
|
auto &C = CGM.getContext();
|
|
|
|
FunctionArgList Args;
|
2017-06-09 21:40:18 +08:00
|
|
|
ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
|
|
|
|
KmpTaskTWithPrivatesPtrQTy,
|
|
|
|
ImplicitParamDecl::Other);
|
|
|
|
ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
|
|
|
|
KmpTaskTWithPrivatesPtrQTy,
|
|
|
|
ImplicitParamDecl::Other);
|
|
|
|
ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
|
|
|
|
ImplicitParamDecl::Other);
|
2016-05-05 16:46:22 +08:00
|
|
|
Args.push_back(&DstArg);
|
|
|
|
Args.push_back(&SrcArg);
|
|
|
|
Args.push_back(&LastprivArg);
|
|
|
|
auto &TaskDupFnInfo =
|
|
|
|
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
|
|
|
|
auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
|
|
|
|
auto *TaskDup =
|
|
|
|
llvm::Function::Create(TaskDupTy, llvm::GlobalValue::InternalLinkage,
|
|
|
|
".omp_task_dup.", &CGM.getModule());
|
|
|
|
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskDup, TaskDupFnInfo);
|
|
|
|
CodeGenFunction CGF(CGM);
|
|
|
|
CGF.disableDebugInfo();
|
|
|
|
CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args);
|
|
|
|
|
|
|
|
LValue TDBase = CGF.EmitLoadOfPointerLValue(
|
|
|
|
CGF.GetAddrOfLocalVar(&DstArg),
|
|
|
|
KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
|
|
|
|
// task_dst->liter = lastpriv;
|
|
|
|
if (WithLastIter) {
|
|
|
|
auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
|
|
|
|
LValue Base = CGF.EmitLValueForField(
|
|
|
|
TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
|
|
|
|
LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
|
|
|
|
llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
|
|
|
|
CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
|
|
|
|
CGF.EmitStoreOfScalar(Lastpriv, LILVal);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Emit initial values for private copies (if any).
|
|
|
|
assert(!Privates.empty());
|
|
|
|
Address KmpTaskSharedsPtr = Address::invalid();
|
|
|
|
if (!Data.FirstprivateVars.empty()) {
|
|
|
|
LValue TDBase = CGF.EmitLoadOfPointerLValue(
|
|
|
|
CGF.GetAddrOfLocalVar(&SrcArg),
|
|
|
|
KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
|
|
|
|
LValue Base = CGF.EmitLValueForField(
|
|
|
|
TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
|
|
|
|
KmpTaskSharedsPtr = Address(
|
|
|
|
CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
|
|
|
|
Base, *std::next(KmpTaskTQTyRD->field_begin(),
|
|
|
|
KmpTaskTShareds)),
|
|
|
|
Loc),
|
|
|
|
CGF.getNaturalTypeAlignment(SharedsTy));
|
|
|
|
}
|
2016-05-10 18:36:51 +08:00
|
|
|
emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
|
|
|
|
SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
|
2016-05-05 16:46:22 +08:00
|
|
|
CGF.FinishFunction();
|
|
|
|
return TaskDup;
|
|
|
|
}
|
|
|
|
|
2016-05-10 18:36:51 +08:00
|
|
|
/// Checks if destructor function is required to be generated.
|
|
|
|
/// \return true if cleanups are required, false otherwise.
|
|
|
|
static bool
|
|
|
|
checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
|
|
|
|
bool NeedsCleanup = false;
|
|
|
|
auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
|
|
|
|
auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
|
|
|
|
for (auto *FD : PrivateRD->fields()) {
|
|
|
|
NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
|
|
|
|
if (NeedsCleanup)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return NeedsCleanup;
|
|
|
|
}
|
|
|
|
|
2016-04-28 17:23:51 +08:00
|
|
|
CGOpenMPRuntime::TaskResultTy
|
|
|
|
CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
|
|
|
|
const OMPExecutableDirective &D,
|
|
|
|
llvm::Value *TaskFunction, QualType SharedsTy,
|
|
|
|
Address Shareds, const OMPTaskDataTy &Data) {
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
auto &C = CGM.getContext();
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
llvm::SmallVector<PrivateDataTy, 4> Privates;
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
// Aggregate privates and sort them by the alignment.
|
2016-04-28 17:23:51 +08:00
|
|
|
auto I = Data.PrivateCopies.begin();
|
|
|
|
for (auto *E : Data.PrivateVars) {
|
2015-04-30 14:51:57 +08:00
|
|
|
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
|
|
|
|
Privates.push_back(std::make_pair(
|
2015-09-11 18:29:41 +08:00
|
|
|
C.getDeclAlign(VD),
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
|
|
|
|
/*PrivateElemInit=*/nullptr)));
|
2015-04-30 14:51:57 +08:00
|
|
|
++I;
|
|
|
|
}
|
2016-04-28 17:23:51 +08:00
|
|
|
I = Data.FirstprivateCopies.begin();
|
|
|
|
auto IElemInitRef = Data.FirstprivateInits.begin();
|
|
|
|
for (auto *E : Data.FirstprivateVars) {
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
|
|
|
|
Privates.push_back(std::make_pair(
|
2015-09-11 18:29:41 +08:00
|
|
|
C.getDeclAlign(VD),
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
PrivateHelpersTy(
|
|
|
|
VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
|
|
|
|
cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
|
2016-02-19 06:34:54 +08:00
|
|
|
++I;
|
|
|
|
++IElemInitRef;
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
}
|
2016-05-05 16:46:22 +08:00
|
|
|
I = Data.LastprivateCopies.begin();
|
|
|
|
for (auto *E : Data.LastprivateVars) {
|
|
|
|
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
|
|
|
|
Privates.push_back(std::make_pair(
|
|
|
|
C.getDeclAlign(VD),
|
|
|
|
PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
|
|
|
|
/*PrivateElemInit=*/nullptr)));
|
|
|
|
++I;
|
|
|
|
}
|
2015-04-30 14:51:57 +08:00
|
|
|
llvm::array_pod_sort(Privates.begin(), Privates.end(),
|
|
|
|
array_pod_sort_comparator);
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
|
|
|
|
// Build type kmp_routine_entry_t (if not built yet).
|
|
|
|
emitKmpRoutineEntryT(KmpInt32Ty);
|
2015-05-18 15:54:53 +08:00
|
|
|
// Build type kmp_task_t (if not built yet).
|
|
|
|
if (KmpTaskTQTy.isNull()) {
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
KmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
|
|
|
|
CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
|
2015-05-18 15:54:53 +08:00
|
|
|
}
|
|
|
|
auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
// Build particular struct kmp_task_t for the given task.
|
2015-05-18 15:54:53 +08:00
|
|
|
auto *KmpTaskTWithPrivatesQTyRD =
|
|
|
|
createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
|
|
|
|
auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
|
|
|
|
QualType KmpTaskTWithPrivatesPtrQTy =
|
|
|
|
C.getPointerType(KmpTaskTWithPrivatesQTy);
|
|
|
|
auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
|
|
|
|
auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
|
2016-01-26 20:20:39 +08:00
|
|
|
auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
QualType SharedsPtrTy = C.getPointerType(SharedsTy);
|
|
|
|
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
// Emit initial values for private copies (if any).
|
|
|
|
llvm::Value *TaskPrivatesMap = nullptr;
|
|
|
|
auto *TaskPrivatesMapTy =
|
2017-03-17 02:55:46 +08:00
|
|
|
std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType();
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
if (!Privates.empty()) {
|
|
|
|
auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
|
2016-05-05 16:46:22 +08:00
|
|
|
TaskPrivatesMap = emitTaskPrivateMappingFunction(
|
|
|
|
CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
|
|
|
|
FI->getType(), Privates);
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
|
TaskPrivatesMap, TaskPrivatesMapTy);
|
|
|
|
} else {
|
|
|
|
TaskPrivatesMap = llvm::ConstantPointerNull::get(
|
|
|
|
cast<llvm::PointerType>(TaskPrivatesMapTy));
|
|
|
|
}
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
// Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
|
|
|
|
// kmp_task_t *tt);
|
2015-05-18 15:54:53 +08:00
|
|
|
auto *TaskEntry = emitProxyTaskFunction(
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
|
|
|
|
KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
|
|
|
|
TaskPrivatesMap);
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
|
|
|
|
// Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
|
|
|
|
// kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
|
|
|
|
// kmp_routine_entry_t *task_entry);
|
|
|
|
// Task flags. Format is taken from
|
|
|
|
// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
|
|
|
|
// description of kmp_tasking_flags struct.
|
2016-05-10 20:21:02 +08:00
|
|
|
enum {
|
|
|
|
TiedFlag = 0x1,
|
|
|
|
FinalFlag = 0x2,
|
|
|
|
DestructorsFlag = 0x8,
|
|
|
|
PriorityFlag = 0x20
|
|
|
|
};
|
2016-04-28 17:23:51 +08:00
|
|
|
unsigned Flags = Data.Tied ? TiedFlag : 0;
|
2016-05-10 18:36:51 +08:00
|
|
|
bool NeedsCleanup = false;
|
|
|
|
if (!Privates.empty()) {
|
|
|
|
NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
|
|
|
|
if (NeedsCleanup)
|
|
|
|
Flags = Flags | DestructorsFlag;
|
|
|
|
}
|
2016-05-10 20:21:02 +08:00
|
|
|
if (Data.Priority.getInt())
|
|
|
|
Flags = Flags | PriorityFlag;
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
auto *TaskFlags =
|
2016-04-28 17:23:51 +08:00
|
|
|
Data.Final.getPointer()
|
|
|
|
? CGF.Builder.CreateSelect(Data.Final.getPointer(),
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
CGF.Builder.getInt32(FinalFlag),
|
|
|
|
CGF.Builder.getInt32(/*C=*/0))
|
2016-04-28 17:23:51 +08:00
|
|
|
: CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
|
2015-11-24 21:01:44 +08:00
|
|
|
auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
|
2015-10-08 17:10:53 +08:00
|
|
|
llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
|
|
|
|
getThreadID(CGF, Loc), TaskFlags,
|
|
|
|
KmpTaskTWithPrivatesTySize, SharedsSize,
|
|
|
|
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
|
TaskEntry, KmpRoutineEntryPtrTy)};
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
auto *NewTask = CGF.EmitRuntimeCall(
|
|
|
|
createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
|
2015-05-18 15:54:53 +08:00
|
|
|
auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
|
NewTask, KmpTaskTWithPrivatesPtrTy);
|
|
|
|
LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
|
|
|
|
KmpTaskTWithPrivatesQTy);
|
|
|
|
LValue TDBase =
|
|
|
|
CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
// Fill the data in the resulting kmp_task_t record.
|
|
|
|
// Copy shareds if there are any.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address KmpTaskSharedsPtr = Address::invalid();
|
2015-05-18 15:54:53 +08:00
|
|
|
if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
|
2015-09-10 16:12:02 +08:00
|
|
|
KmpTaskSharedsPtr =
|
|
|
|
Address(CGF.EmitLoadOfScalar(
|
|
|
|
CGF.EmitLValueForField(
|
|
|
|
TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
|
|
|
|
KmpTaskTShareds)),
|
|
|
|
Loc),
|
|
|
|
CGF.getNaturalTypeAlignment(SharedsTy));
|
2015-04-30 14:51:57 +08:00
|
|
|
CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
|
2015-05-18 15:54:53 +08:00
|
|
|
}
|
2015-04-30 14:51:57 +08:00
|
|
|
// Emit initial values for private copies (if any).
|
2016-05-05 16:46:22 +08:00
|
|
|
TaskResultTy Result;
|
2015-04-30 14:51:57 +08:00
|
|
|
if (!Privates.empty()) {
|
2016-05-10 18:36:51 +08:00
|
|
|
emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
|
|
|
|
SharedsTy, SharedsPtrTy, Data, Privates,
|
|
|
|
/*ForDup=*/false);
|
2016-05-05 16:46:22 +08:00
|
|
|
if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
|
|
|
|
(!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
|
|
|
|
Result.TaskDupFn = emitTaskDupFunction(
|
|
|
|
CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
|
|
|
|
KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
|
|
|
|
/*WithLastIter=*/!Data.LastprivateVars.empty());
|
2015-04-30 14:51:57 +08:00
|
|
|
}
|
|
|
|
}
|
2016-05-30 17:06:50 +08:00
|
|
|
// Fields of union "kmp_cmplrdata_t" for destructors and priority.
|
|
|
|
enum { Priority = 0, Destructors = 1 };
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
// Provide pointer to function with destructors for privates.
|
2016-05-30 17:06:50 +08:00
|
|
|
auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
|
|
|
|
auto *KmpCmplrdataUD = (*FI)->getType()->getAsUnionType()->getDecl();
|
|
|
|
if (NeedsCleanup) {
|
|
|
|
llvm::Value *DestructorFn = emitDestructorsFunction(
|
|
|
|
CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
|
|
|
|
KmpTaskTWithPrivatesQTy);
|
|
|
|
LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
|
|
|
|
LValue DestructorsLV = CGF.EmitLValueForField(
|
|
|
|
Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
|
|
|
|
CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
|
DestructorFn, KmpRoutineEntryPtrTy),
|
|
|
|
DestructorsLV);
|
|
|
|
}
|
|
|
|
// Set priority.
|
|
|
|
if (Data.Priority.getInt()) {
|
|
|
|
LValue Data2LV = CGF.EmitLValueForField(
|
|
|
|
TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
|
|
|
|
LValue PriorityLV = CGF.EmitLValueForField(
|
|
|
|
Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
|
|
|
|
CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
|
|
|
|
}
|
2016-04-28 17:23:51 +08:00
|
|
|
Result.NewTask = NewTask;
|
|
|
|
Result.TaskEntry = TaskEntry;
|
|
|
|
Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
|
|
|
|
Result.TDBase = TDBase;
|
|
|
|
Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
|
|
|
|
return Result;
|
|
|
|
}
|
|
|
|
|
|
|
|
void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
|
|
|
|
const OMPExecutableDirective &D,
|
|
|
|
llvm::Value *TaskFunction,
|
|
|
|
QualType SharedsTy, Address Shareds,
|
|
|
|
const Expr *IfCond,
|
|
|
|
const OMPTaskDataTy &Data) {
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
|
|
|
|
2016-04-28 17:23:51 +08:00
|
|
|
TaskResultTy Result =
|
|
|
|
emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
|
|
|
|
llvm::Value *NewTask = Result.NewTask;
|
|
|
|
llvm::Value *TaskEntry = Result.TaskEntry;
|
|
|
|
llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
|
|
|
|
LValue TDBase = Result.TDBase;
|
|
|
|
RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
auto &C = CGM.getContext();
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
// Process list of dependences.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address DependenciesArray = Address::invalid();
|
2016-04-28 17:23:51 +08:00
|
|
|
unsigned NumDependencies = Data.Dependences.size();
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
if (NumDependencies) {
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
// Dependence kind for RTL.
|
2015-11-23 21:33:42 +08:00
|
|
|
enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 };
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
|
|
|
|
RecordDecl *KmpDependInfoRD;
|
2015-10-08 17:10:53 +08:00
|
|
|
QualType FlagsTy =
|
|
|
|
C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
|
|
|
|
if (KmpDependInfoTy.isNull()) {
|
|
|
|
KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
|
|
|
|
KmpDependInfoRD->startDefinition();
|
|
|
|
addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
|
|
|
|
addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
|
|
|
|
addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
|
|
|
|
KmpDependInfoRD->completeDefinition();
|
|
|
|
KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
|
2016-04-28 17:23:51 +08:00
|
|
|
} else
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
// Define type kmp_depend_info[<Dependences.size()>];
|
|
|
|
QualType KmpDependInfoArrayTy = C.getConstantArrayType(
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
ArrayType::Normal, /*IndexTypeQuals=*/0);
|
|
|
|
// kmp_depend_info[<Dependences.size()>] deps;
|
2016-04-20 12:01:36 +08:00
|
|
|
DependenciesArray =
|
|
|
|
CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
for (unsigned i = 0; i < NumDependencies; ++i) {
|
2016-04-28 17:23:51 +08:00
|
|
|
const Expr *E = Data.Dependences[i].second;
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
auto Addr = CGF.EmitLValue(E);
|
2015-08-31 15:32:19 +08:00
|
|
|
llvm::Value *Size;
|
|
|
|
QualType Ty = E->getType();
|
|
|
|
if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
|
|
|
|
LValue UpAddrLVal =
|
|
|
|
CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
|
|
|
|
llvm::Value *UpAddr =
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
|
2015-08-31 15:32:19 +08:00
|
|
|
llvm::Value *LowIntPtr =
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
|
2015-08-31 15:32:19 +08:00
|
|
|
llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
|
|
|
|
Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
|
2015-10-08 17:10:53 +08:00
|
|
|
} else
|
2016-01-26 20:20:39 +08:00
|
|
|
Size = CGF.getTypeSize(Ty);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
auto Base = CGF.MakeAddrLValue(
|
|
|
|
CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
KmpDependInfoTy);
|
|
|
|
// deps[i].base_addr = &<Dependences[i].second>;
|
|
|
|
auto BaseAddrLVal = CGF.EmitLValueForField(
|
|
|
|
Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CGF.EmitStoreOfScalar(
|
|
|
|
CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
|
|
|
|
BaseAddrLVal);
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
// deps[i].len = sizeof(<Dependences[i].second>);
|
|
|
|
auto LenLVal = CGF.EmitLValueForField(
|
|
|
|
Base, *std::next(KmpDependInfoRD->field_begin(), Len));
|
|
|
|
CGF.EmitStoreOfScalar(Size, LenLVal);
|
|
|
|
// deps[i].flags = <Dependences[i].first>;
|
|
|
|
RTLDependenceKindTy DepKind;
|
2016-04-28 17:23:51 +08:00
|
|
|
switch (Data.Dependences[i].first) {
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
case OMPC_DEPEND_in:
|
|
|
|
DepKind = DepIn;
|
|
|
|
break;
|
2015-11-23 21:33:42 +08:00
|
|
|
// Out and InOut dependencies must use the same code.
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
case OMPC_DEPEND_out:
|
|
|
|
case OMPC_DEPEND_inout:
|
|
|
|
DepKind = DepInOut;
|
|
|
|
break;
|
2015-12-18 13:05:56 +08:00
|
|
|
case OMPC_DEPEND_source:
|
2015-12-23 18:27:45 +08:00
|
|
|
case OMPC_DEPEND_sink:
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
case OMPC_DEPEND_unknown:
|
|
|
|
llvm_unreachable("Unknown task dependence type");
|
|
|
|
}
|
|
|
|
auto FlagsLVal = CGF.EmitLValueForField(
|
|
|
|
Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
|
|
|
|
CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
|
|
|
|
FlagsLVal);
|
|
|
|
}
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
|
CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
CGF.VoidPtrTy);
|
|
|
|
}
|
|
|
|
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
// NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
|
|
|
|
// libcall.
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
// Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
|
|
|
|
// kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
|
|
|
|
// kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
|
|
|
|
// list is not empty
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
auto *ThreadID = getThreadID(CGF, Loc);
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
auto *UpLoc = emitUpdateLocation(CGF, Loc);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
|
|
|
|
llvm::Value *DepTaskArgs[7];
|
|
|
|
if (NumDependencies) {
|
|
|
|
DepTaskArgs[0] = UpLoc;
|
|
|
|
DepTaskArgs[1] = ThreadID;
|
|
|
|
DepTaskArgs[2] = NewTask;
|
|
|
|
DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
|
|
|
|
DepTaskArgs[4] = DependenciesArray.getPointer();
|
|
|
|
DepTaskArgs[5] = CGF.Builder.getInt32(0);
|
|
|
|
DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
|
|
|
|
}
|
2017-01-14 02:55:32 +08:00
|
|
|
auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
|
|
|
|
&TaskArgs,
|
2016-03-29 13:34:15 +08:00
|
|
|
&DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
|
2016-04-28 17:23:51 +08:00
|
|
|
if (!Data.Tied) {
|
2016-04-20 12:01:36 +08:00
|
|
|
auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
|
|
|
|
auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
|
|
|
|
CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
|
|
|
|
}
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
if (NumDependencies) {
|
2016-03-29 13:34:15 +08:00
|
|
|
CGF.EmitRuntimeCall(
|
2016-04-20 12:01:36 +08:00
|
|
|
createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
} else {
|
2016-04-20 12:01:36 +08:00
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
TaskArgs);
|
|
|
|
}
|
2016-04-20 12:01:36 +08:00
|
|
|
// Check if parent region is untied and build return for untied task;
|
|
|
|
if (auto *Region =
|
|
|
|
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
|
|
|
|
Region->emitUntiedSwitch(CGF);
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
};
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
|
|
|
|
llvm::Value *DepWaitTaskArgs[6];
|
|
|
|
if (NumDependencies) {
|
|
|
|
DepWaitTaskArgs[0] = UpLoc;
|
|
|
|
DepWaitTaskArgs[1] = ThreadID;
|
|
|
|
DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
|
|
|
|
DepWaitTaskArgs[3] = DependenciesArray.getPointer();
|
|
|
|
DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
|
|
|
|
DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
|
|
|
|
}
|
2016-03-29 13:34:15 +08:00
|
|
|
auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
|
|
|
|
NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF,
|
|
|
|
PrePostActionTy &) {
|
|
|
|
auto &RT = CGF.CGM.getOpenMPRuntime();
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
CodeGenFunction::RunCleanupsScope LocalScope(CGF);
|
|
|
|
// Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
|
|
|
|
// kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
|
|
|
|
// ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
|
|
|
|
// is specified.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
if (NumDependencies)
|
2016-03-29 13:34:15 +08:00
|
|
|
CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
DepWaitTaskArgs);
|
2016-03-29 13:34:15 +08:00
|
|
|
// Call proxy_task_entry(gtid, new_task);
|
|
|
|
auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy](
|
|
|
|
CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
Action.Enter(CGF);
|
|
|
|
llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
|
|
|
|
CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
|
|
|
|
};
|
|
|
|
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
// Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
|
|
|
|
// kmp_task_t *new_task);
|
|
|
|
// Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
|
|
|
|
// kmp_task_t *new_task);
|
2016-03-29 13:34:15 +08:00
|
|
|
RegionCodeGenTy RCG(CodeGen);
|
|
|
|
CommonActionTy Action(
|
|
|
|
RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
|
|
|
|
RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
|
|
|
|
RCG.setAction(Action);
|
|
|
|
RCG(CGF);
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
};
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
|
2016-03-29 13:34:15 +08:00
|
|
|
if (IfCond)
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
|
2016-03-29 13:34:15 +08:00
|
|
|
else {
|
|
|
|
RegionCodeGenTy ThenRCG(ThenCodeGen);
|
|
|
|
ThenRCG(CGF);
|
2016-03-28 20:58:34 +08:00
|
|
|
}
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
}
|
|
|
|
|
2016-04-28 17:23:51 +08:00
|
|
|
void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
|
|
|
|
const OMPLoopDirective &D,
|
|
|
|
llvm::Value *TaskFunction,
|
|
|
|
QualType SharedsTy, Address Shareds,
|
|
|
|
const Expr *IfCond,
|
|
|
|
const OMPTaskDataTy &Data) {
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2016-04-28 17:23:51 +08:00
|
|
|
TaskResultTy Result =
|
|
|
|
emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
// NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
|
|
|
|
// libcall.
|
|
|
|
// Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
|
|
|
|
// if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
|
|
|
|
// sched, kmp_uint64 grainsize, void *task_dup);
|
|
|
|
llvm::Value *ThreadID = getThreadID(CGF, Loc);
|
|
|
|
llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
|
|
|
|
llvm::Value *IfVal;
|
|
|
|
if (IfCond) {
|
|
|
|
IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
|
|
|
|
/*isSigned=*/true);
|
|
|
|
} else
|
|
|
|
IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
|
|
|
|
|
|
|
|
LValue LBLVal = CGF.EmitLValueForField(
|
2016-04-28 17:23:51 +08:00
|
|
|
Result.TDBase,
|
|
|
|
*std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
auto *LBVar =
|
|
|
|
cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
|
|
|
|
CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
|
|
|
|
/*IsInitializer=*/true);
|
|
|
|
LValue UBLVal = CGF.EmitLValueForField(
|
2016-04-28 17:23:51 +08:00
|
|
|
Result.TDBase,
|
|
|
|
*std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
auto *UBVar =
|
|
|
|
cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
|
|
|
|
CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
|
|
|
|
/*IsInitializer=*/true);
|
|
|
|
LValue StLVal = CGF.EmitLValueForField(
|
2016-04-28 17:23:51 +08:00
|
|
|
Result.TDBase,
|
|
|
|
*std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
auto *StVar =
|
|
|
|
cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
|
|
|
|
CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
|
|
|
|
/*IsInitializer=*/true);
|
2017-07-17 21:30:36 +08:00
|
|
|
// Store reductions address.
|
|
|
|
LValue RedLVal = CGF.EmitLValueForField(
|
|
|
|
Result.TDBase,
|
|
|
|
*std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
|
|
|
|
if (Data.Reductions)
|
|
|
|
CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
|
|
|
|
else {
|
|
|
|
CGF.EmitNullInitialization(RedLVal.getAddress(),
|
|
|
|
CGF.getContext().VoidPtrTy);
|
|
|
|
}
|
[OPENMP 4.5] Codegen for 'grainsize/num_tasks' clauses of 'taskloop'
directive.
OpenMP 4.5 defines 'taskloop' directive and 2 additional clauses
'grainsize' and 'num_tasks' for this directive. Patch adds codegen for
these clauses.
These clauses are generated as arguments of the '__kmpc_taskloop'
libcall and are encoded the following way:
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, void *task_dup);
If 'grainsize' is specified, 'sched' argument must be set to '1' and
'grainsize' argument must be set to the value of the 'grainsize' clause.
If 'num_tasks' is specified, 'sched' argument must be set to '2' and
'grainsize' argument must be set to the value of the 'num_tasks' clause.
It is possible because these 2 clauses are mutually exclusive and can't
be used at the same time on the same directive.
If none of these clauses is specified, 'sched' argument must be set to
'0'.
llvm-svn: 267862
2016-04-28 17:15:06 +08:00
|
|
|
enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
llvm::Value *TaskArgs[] = {
|
2017-07-13 02:09:32 +08:00
|
|
|
UpLoc,
|
|
|
|
ThreadID,
|
|
|
|
Result.NewTask,
|
|
|
|
IfVal,
|
|
|
|
LBLVal.getPointer(),
|
|
|
|
UBLVal.getPointer(),
|
|
|
|
CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
|
|
|
|
llvm::ConstantInt::getNullValue(
|
|
|
|
CGF.IntTy), // Always 0 because taskgroup emitted by the compiler
|
[OPENMP 4.5] Codegen for 'grainsize/num_tasks' clauses of 'taskloop'
directive.
OpenMP 4.5 defines 'taskloop' directive and 2 additional clauses
'grainsize' and 'num_tasks' for this directive. Patch adds codegen for
these clauses.
These clauses are generated as arguments of the '__kmpc_taskloop'
libcall and are encoded the following way:
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, void *task_dup);
If 'grainsize' is specified, 'sched' argument must be set to '1' and
'grainsize' argument must be set to the value of the 'grainsize' clause.
If 'num_tasks' is specified, 'sched' argument must be set to '2' and
'grainsize' argument must be set to the value of the 'num_tasks' clause.
It is possible because these 2 clauses are mutually exclusive and can't
be used at the same time on the same directive.
If none of these clauses is specified, 'sched' argument must be set to
'0'.
llvm-svn: 267862
2016-04-28 17:15:06 +08:00
|
|
|
llvm::ConstantInt::getSigned(
|
2016-04-28 17:23:51 +08:00
|
|
|
CGF.IntTy, Data.Schedule.getPointer()
|
|
|
|
? Data.Schedule.getInt() ? NumTasks : Grainsize
|
[OPENMP 4.5] Codegen for 'grainsize/num_tasks' clauses of 'taskloop'
directive.
OpenMP 4.5 defines 'taskloop' directive and 2 additional clauses
'grainsize' and 'num_tasks' for this directive. Patch adds codegen for
these clauses.
These clauses are generated as arguments of the '__kmpc_taskloop'
libcall and are encoded the following way:
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, void *task_dup);
If 'grainsize' is specified, 'sched' argument must be set to '1' and
'grainsize' argument must be set to the value of the 'grainsize' clause.
If 'num_tasks' is specified, 'sched' argument must be set to '2' and
'grainsize' argument must be set to the value of the 'num_tasks' clause.
It is possible because these 2 clauses are mutually exclusive and can't
be used at the same time on the same directive.
If none of these clauses is specified, 'sched' argument must be set to
'0'.
llvm-svn: 267862
2016-04-28 17:15:06 +08:00
|
|
|
: NoSchedule),
|
2016-04-28 17:23:51 +08:00
|
|
|
Data.Schedule.getPointer()
|
|
|
|
? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
|
[OPENMP 4.5] Codegen for 'grainsize/num_tasks' clauses of 'taskloop'
directive.
OpenMP 4.5 defines 'taskloop' directive and 2 additional clauses
'grainsize' and 'num_tasks' for this directive. Patch adds codegen for
these clauses.
These clauses are generated as arguments of the '__kmpc_taskloop'
libcall and are encoded the following way:
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, void *task_dup);
If 'grainsize' is specified, 'sched' argument must be set to '1' and
'grainsize' argument must be set to the value of the 'grainsize' clause.
If 'num_tasks' is specified, 'sched' argument must be set to '2' and
'grainsize' argument must be set to the value of the 'num_tasks' clause.
It is possible because these 2 clauses are mutually exclusive and can't
be used at the same time on the same directive.
If none of these clauses is specified, 'sched' argument must be set to
'0'.
llvm-svn: 267862
2016-04-28 17:15:06 +08:00
|
|
|
/*isSigned=*/false)
|
|
|
|
: llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
|
2017-07-13 02:09:32 +08:00
|
|
|
Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
|
Result.TaskDupFn, CGF.VoidPtrTy)
|
|
|
|
: llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
|
|
|
|
}
|
|
|
|
|
2015-10-08 17:10:53 +08:00
|
|
|
/// \brief Emit reduction operation for each element of array (required for
|
|
|
|
/// array sections) LHS op = RHS.
|
|
|
|
/// \param Type Type of array.
|
|
|
|
/// \param LHSVar Variable on the left side of the reduction operation
|
|
|
|
/// (references element of array in original variable).
|
|
|
|
/// \param RHSVar Variable on the right side of the reduction operation
|
|
|
|
/// (references element of array in original variable).
|
|
|
|
/// \param RedOpGen Generator of reduction operation with use of LHSVar and
|
|
|
|
/// RHSVar.
|
2015-10-28 21:54:16 +08:00
|
|
|
static void EmitOMPAggregateReduction(
|
2015-10-08 17:10:53 +08:00
|
|
|
CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
|
|
|
|
const VarDecl *RHSVar,
|
|
|
|
const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
|
|
|
|
const Expr *, const Expr *)> &RedOpGen,
|
|
|
|
const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
|
|
|
|
const Expr *UpExpr = nullptr) {
|
|
|
|
// Perform element-by-element initialization.
|
|
|
|
QualType ElementTy;
|
|
|
|
Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
|
|
|
|
Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
|
|
|
|
|
|
|
|
// Drill down to the base element type on both arrays.
|
|
|
|
auto ArrayTy = Type->getAsArrayTypeUnsafe();
|
|
|
|
auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
|
|
|
|
|
|
|
|
auto RHSBegin = RHSAddr.getPointer();
|
|
|
|
auto LHSBegin = LHSAddr.getPointer();
|
|
|
|
// Cast from pointer to array type to pointer to single element.
|
|
|
|
auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
|
|
|
|
// The basic structure here is a while-do loop.
|
|
|
|
auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
|
|
|
|
auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
|
|
|
|
auto IsEmpty =
|
|
|
|
CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
|
|
|
|
CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
|
|
|
|
|
|
|
|
// Enter the loop body, making that address the current address.
|
|
|
|
auto EntryBB = CGF.Builder.GetInsertBlock();
|
|
|
|
CGF.EmitBlock(BodyBB);
|
|
|
|
|
|
|
|
CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
|
|
|
|
|
|
|
|
llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
|
|
|
|
RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
|
|
|
|
RHSElementPHI->addIncoming(RHSBegin, EntryBB);
|
|
|
|
Address RHSElementCurrent =
|
|
|
|
Address(RHSElementPHI,
|
|
|
|
RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
|
|
|
|
|
|
|
|
llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
|
|
|
|
LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
|
|
|
|
LHSElementPHI->addIncoming(LHSBegin, EntryBB);
|
|
|
|
Address LHSElementCurrent =
|
|
|
|
Address(LHSElementPHI,
|
|
|
|
LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
|
|
|
|
|
|
|
|
// Emit copy.
|
|
|
|
CodeGenFunction::OMPPrivateScope Scope(CGF);
|
|
|
|
Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; });
|
|
|
|
Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; });
|
|
|
|
Scope.Privatize();
|
|
|
|
RedOpGen(CGF, XExpr, EExpr, UpExpr);
|
|
|
|
Scope.ForceCleanup();
|
|
|
|
|
|
|
|
// Shift the address forward by one element.
|
|
|
|
auto LHSElementNext = CGF.Builder.CreateConstGEP1_32(
|
|
|
|
LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
|
|
|
|
auto RHSElementNext = CGF.Builder.CreateConstGEP1_32(
|
|
|
|
RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
|
|
|
|
// Check whether we've reached the end.
|
|
|
|
auto Done =
|
|
|
|
CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
|
|
|
|
CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
|
|
|
|
LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
|
|
|
|
RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
|
|
|
|
|
|
|
|
// Done.
|
|
|
|
CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
|
|
|
|
}
|
|
|
|
|
2016-03-17 18:19:46 +08:00
|
|
|
/// Emit reduction combiner. If the combiner is a simple expression emit it as
|
|
|
|
/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
|
|
|
|
/// UDR combiner function.
|
|
|
|
static void emitReductionCombiner(CodeGenFunction &CGF,
|
|
|
|
const Expr *ReductionOp) {
|
|
|
|
if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
|
|
|
|
if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
|
|
|
|
if (auto *DRE =
|
|
|
|
dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
|
|
|
|
if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
|
|
|
|
std::pair<llvm::Function *, llvm::Function *> Reduction =
|
|
|
|
CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
|
|
|
|
RValue Func = RValue::get(Reduction.first);
|
|
|
|
CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
|
|
|
|
CGF.EmitIgnoredExpr(ReductionOp);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
CGF.EmitIgnoredExpr(ReductionOp);
|
|
|
|
}
|
|
|
|
|
2017-02-17 00:20:16 +08:00
|
|
|
llvm::Value *CGOpenMPRuntime::emitReductionFunction(
|
|
|
|
CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
|
|
|
|
ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
|
|
|
|
ArrayRef<const Expr *> ReductionOps) {
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
auto &C = CGM.getContext();
|
|
|
|
|
|
|
|
// void reduction_func(void *LHSArg, void *RHSArg);
|
|
|
|
FunctionArgList Args;
|
2017-06-09 21:40:18 +08:00
|
|
|
ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
|
|
|
|
ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
Args.push_back(&LHSArg);
|
|
|
|
Args.push_back(&RHSArg);
|
2016-03-11 12:30:31 +08:00
|
|
|
auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
auto *Fn = llvm::Function::Create(
|
|
|
|
CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
|
|
|
|
".omp.reduction.reduction_func", &CGM.getModule());
|
2015-10-28 10:30:47 +08:00
|
|
|
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
CodeGenFunction CGF(CGM);
|
|
|
|
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
|
|
|
|
|
|
|
|
// Dst = (void*[n])(LHSArg);
|
|
|
|
// Src = (void*[n])(RHSArg);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
|
CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
|
|
|
|
ArgsType), CGF.getPointerAlign());
|
|
|
|
Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
|
CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
|
|
|
|
ArgsType), CGF.getPointerAlign());
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
|
|
|
|
// ...
|
|
|
|
// *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
|
|
|
|
// ...
|
|
|
|
CodeGenFunction::OMPPrivateScope Scope(CGF);
|
2015-10-08 17:10:53 +08:00
|
|
|
auto IPriv = Privates.begin();
|
|
|
|
unsigned Idx = 0;
|
|
|
|
for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
|
|
|
|
Scope.addPrivate(RHSVar, [&]() -> Address {
|
2015-10-08 17:10:53 +08:00
|
|
|
return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
});
|
|
|
|
auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
|
|
|
|
Scope.addPrivate(LHSVar, [&]() -> Address {
|
2015-10-08 17:10:53 +08:00
|
|
|
return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
});
|
2015-10-08 17:10:53 +08:00
|
|
|
QualType PrivTy = (*IPriv)->getType();
|
2016-01-26 20:20:39 +08:00
|
|
|
if (PrivTy->isVariablyModifiedType()) {
|
2015-10-08 17:10:53 +08:00
|
|
|
// Get array size and emit VLA type.
|
|
|
|
++Idx;
|
|
|
|
Address Elem =
|
|
|
|
CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize());
|
|
|
|
llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
|
2016-01-26 20:20:39 +08:00
|
|
|
auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy);
|
|
|
|
auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
|
2015-10-08 17:10:53 +08:00
|
|
|
CodeGenFunction::OpaqueValueMapping OpaqueMap(
|
2016-01-26 20:20:39 +08:00
|
|
|
CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
|
2015-10-08 17:10:53 +08:00
|
|
|
CGF.EmitVariablyModifiedType(PrivTy);
|
|
|
|
}
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
}
|
|
|
|
Scope.Privatize();
|
2015-10-08 17:10:53 +08:00
|
|
|
IPriv = Privates.begin();
|
|
|
|
auto ILHS = LHSExprs.begin();
|
|
|
|
auto IRHS = RHSExprs.begin();
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
for (auto *E : ReductionOps) {
|
2015-10-08 17:10:53 +08:00
|
|
|
if ((*IPriv)->getType()->isArrayType()) {
|
|
|
|
// Emit reduction for array section.
|
|
|
|
auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
|
|
|
|
auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
|
2016-03-17 18:19:46 +08:00
|
|
|
EmitOMPAggregateReduction(
|
|
|
|
CGF, (*IPriv)->getType(), LHSVar, RHSVar,
|
|
|
|
[=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
|
|
|
|
emitReductionCombiner(CGF, E);
|
|
|
|
});
|
2015-10-08 17:10:53 +08:00
|
|
|
} else
|
|
|
|
// Emit reduction for array subscript or single variable.
|
2016-03-17 18:19:46 +08:00
|
|
|
emitReductionCombiner(CGF, E);
|
2016-02-19 06:34:54 +08:00
|
|
|
++IPriv;
|
|
|
|
++ILHS;
|
|
|
|
++IRHS;
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
}
|
|
|
|
Scope.ForceCleanup();
|
|
|
|
CGF.FinishFunction();
|
|
|
|
return Fn;
|
|
|
|
}
|
|
|
|
|
2017-02-17 00:20:16 +08:00
|
|
|
void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
|
|
|
|
const Expr *ReductionOp,
|
|
|
|
const Expr *PrivateRef,
|
|
|
|
const DeclRefExpr *LHS,
|
|
|
|
const DeclRefExpr *RHS) {
|
2016-03-29 13:34:15 +08:00
|
|
|
if (PrivateRef->getType()->isArrayType()) {
|
|
|
|
// Emit reduction for array section.
|
|
|
|
auto *LHSVar = cast<VarDecl>(LHS->getDecl());
|
|
|
|
auto *RHSVar = cast<VarDecl>(RHS->getDecl());
|
|
|
|
EmitOMPAggregateReduction(
|
|
|
|
CGF, PrivateRef->getType(), LHSVar, RHSVar,
|
|
|
|
[=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
|
|
|
|
emitReductionCombiner(CGF, ReductionOp);
|
|
|
|
});
|
|
|
|
} else
|
|
|
|
// Emit reduction for array subscript or single variable.
|
|
|
|
emitReductionCombiner(CGF, ReductionOp);
|
|
|
|
}
|
|
|
|
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
|
2015-10-08 17:10:53 +08:00
|
|
|
ArrayRef<const Expr *> Privates,
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
ArrayRef<const Expr *> LHSExprs,
|
|
|
|
ArrayRef<const Expr *> RHSExprs,
|
|
|
|
ArrayRef<const Expr *> ReductionOps,
|
2017-02-17 00:20:16 +08:00
|
|
|
ReductionOptionsTy Options) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2017-02-17 00:20:16 +08:00
|
|
|
|
|
|
|
bool WithNowait = Options.WithNowait;
|
|
|
|
bool SimpleReduction = Options.SimpleReduction;
|
|
|
|
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
// Next code should be emitted for reduction:
|
|
|
|
//
|
|
|
|
// static kmp_critical_name lock = { 0 };
|
|
|
|
//
|
|
|
|
// void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
|
|
|
|
// *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
|
|
|
|
// ...
|
|
|
|
// *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
|
|
|
|
// *(Type<n>-1*)rhs[<n>-1]);
|
|
|
|
// }
|
|
|
|
//
|
|
|
|
// ...
|
|
|
|
// void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
|
|
|
|
// switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
|
|
|
|
// RedList, reduce_func, &<lock>)) {
|
|
|
|
// case 1:
|
|
|
|
// ...
|
|
|
|
// <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
|
|
|
|
// ...
|
|
|
|
// __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
|
|
|
|
// break;
|
|
|
|
// case 2:
|
|
|
|
// ...
|
|
|
|
// Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
|
|
|
|
// ...
|
2015-05-07 11:54:03 +08:00
|
|
|
// [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
// break;
|
|
|
|
// default:;
|
|
|
|
// }
|
2015-06-17 14:21:39 +08:00
|
|
|
//
|
|
|
|
// if SimpleReduction is true, only the next code is generated:
|
|
|
|
// ...
|
|
|
|
// <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
|
|
|
|
// ...
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
|
|
|
|
auto &C = CGM.getContext();
|
|
|
|
|
2015-06-17 14:21:39 +08:00
|
|
|
if (SimpleReduction) {
|
|
|
|
CodeGenFunction::RunCleanupsScope Scope(CGF);
|
2015-10-08 17:10:53 +08:00
|
|
|
auto IPriv = Privates.begin();
|
|
|
|
auto ILHS = LHSExprs.begin();
|
|
|
|
auto IRHS = RHSExprs.begin();
|
2015-06-17 14:21:39 +08:00
|
|
|
for (auto *E : ReductionOps) {
|
2016-03-29 13:34:15 +08:00
|
|
|
emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
|
|
|
|
cast<DeclRefExpr>(*IRHS));
|
2016-02-19 06:34:54 +08:00
|
|
|
++IPriv;
|
|
|
|
++ILHS;
|
|
|
|
++IRHS;
|
2015-06-17 14:21:39 +08:00
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
// 1. Build a list of reduction variables.
|
|
|
|
// void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
|
2015-10-08 17:10:53 +08:00
|
|
|
auto Size = RHSExprs.size();
|
|
|
|
for (auto *E : Privates) {
|
2016-01-26 20:20:39 +08:00
|
|
|
if (E->getType()->isVariablyModifiedType())
|
2015-10-08 17:10:53 +08:00
|
|
|
// Reserve place for array size.
|
|
|
|
++Size;
|
|
|
|
}
|
|
|
|
llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
QualType ReductionArrayTy =
|
|
|
|
C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
|
|
|
|
/*IndexTypeQuals=*/0);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address ReductionList =
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
|
2015-10-08 17:10:53 +08:00
|
|
|
auto IPriv = Privates.begin();
|
|
|
|
unsigned Idx = 0;
|
|
|
|
for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address Elem =
|
2015-10-08 17:10:53 +08:00
|
|
|
CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize());
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CGF.Builder.CreateStore(
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
|
|
|
|
Elem);
|
2016-01-26 20:20:39 +08:00
|
|
|
if ((*IPriv)->getType()->isVariablyModifiedType()) {
|
2015-10-08 17:10:53 +08:00
|
|
|
// Store array size.
|
|
|
|
++Idx;
|
|
|
|
Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
|
|
|
|
CGF.getPointerSize());
|
2016-01-26 20:20:39 +08:00
|
|
|
llvm::Value *Size = CGF.Builder.CreateIntCast(
|
|
|
|
CGF.getVLASize(
|
|
|
|
CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
|
|
|
|
.first,
|
|
|
|
CGF.SizeTy, /*isSigned=*/false);
|
|
|
|
CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
|
|
|
|
Elem);
|
2015-10-08 17:10:53 +08:00
|
|
|
}
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// 2. Emit reduce_func().
|
|
|
|
auto *ReductionFn = emitReductionFunction(
|
2015-10-08 17:10:53 +08:00
|
|
|
CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
|
|
|
|
LHSExprs, RHSExprs, ReductionOps);
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
|
|
|
|
// 3. Create static kmp_critical_name lock = { 0 };
|
|
|
|
auto *Lock = getCriticalRegionLock(".reduction");
|
|
|
|
|
|
|
|
// 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
|
|
|
|
// RedList, reduce_func, &<lock>);
|
2016-02-19 18:38:26 +08:00
|
|
|
auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
auto *ThreadId = getThreadID(CGF, Loc);
|
2016-01-26 20:20:39 +08:00
|
|
|
auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
|
2016-12-13 02:00:20 +08:00
|
|
|
auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
|
ReductionList.getPointer(), CGF.VoidPtrTy);
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
llvm::Value *Args[] = {
|
|
|
|
IdentTLoc, // ident_t *<loc>
|
|
|
|
ThreadId, // i32 <gtid>
|
|
|
|
CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
|
|
|
|
ReductionArrayTySize, // size_type sizeof(RedList)
|
|
|
|
RL, // void *RedList
|
|
|
|
ReductionFn, // void (*) (void *, void *) <reduce_func>
|
|
|
|
Lock // kmp_critical_name *&<lock>
|
|
|
|
};
|
|
|
|
auto Res = CGF.EmitRuntimeCall(
|
|
|
|
createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
|
|
|
|
: OMPRTL__kmpc_reduce),
|
|
|
|
Args);
|
|
|
|
|
|
|
|
// 5. Build switch(res)
|
|
|
|
auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
|
|
|
|
auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
|
|
|
|
|
|
|
|
// 6. Build case 1:
|
|
|
|
// ...
|
|
|
|
// <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
|
|
|
|
// ...
|
|
|
|
// __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
|
|
|
|
// break;
|
|
|
|
auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
|
|
|
|
SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
|
|
|
|
CGF.EmitBlock(Case1BB);
|
|
|
|
|
2016-03-29 13:34:15 +08:00
|
|
|
// Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
|
|
|
|
llvm::Value *EndArgs[] = {
|
|
|
|
IdentTLoc, // ident_t *<loc>
|
|
|
|
ThreadId, // i32 <gtid>
|
|
|
|
Lock // kmp_critical_name *&<lock>
|
|
|
|
};
|
|
|
|
auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps](
|
|
|
|
CodeGenFunction &CGF, PrePostActionTy &Action) {
|
2017-02-17 00:20:16 +08:00
|
|
|
auto &RT = CGF.CGM.getOpenMPRuntime();
|
2015-10-08 17:10:53 +08:00
|
|
|
auto IPriv = Privates.begin();
|
|
|
|
auto ILHS = LHSExprs.begin();
|
|
|
|
auto IRHS = RHSExprs.begin();
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
for (auto *E : ReductionOps) {
|
2017-02-17 00:20:16 +08:00
|
|
|
RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
|
|
|
|
cast<DeclRefExpr>(*IRHS));
|
2016-02-19 06:34:54 +08:00
|
|
|
++IPriv;
|
|
|
|
++ILHS;
|
|
|
|
++IRHS;
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
}
|
2016-03-29 13:34:15 +08:00
|
|
|
};
|
|
|
|
RegionCodeGenTy RCG(CodeGen);
|
|
|
|
CommonActionTy Action(
|
|
|
|
nullptr, llvm::None,
|
|
|
|
createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
|
|
|
|
: OMPRTL__kmpc_end_reduce),
|
|
|
|
EndArgs);
|
|
|
|
RCG.setAction(Action);
|
|
|
|
RCG(CGF);
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
|
|
|
|
CGF.EmitBranch(DefaultBB);
|
|
|
|
|
|
|
|
// 7. Build case 2:
|
|
|
|
// ...
|
|
|
|
// Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
|
|
|
|
// ...
|
|
|
|
// break;
|
|
|
|
auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
|
|
|
|
SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
|
|
|
|
CGF.EmitBlock(Case2BB);
|
|
|
|
|
2016-03-29 13:34:15 +08:00
|
|
|
auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps](
|
|
|
|
CodeGenFunction &CGF, PrePostActionTy &Action) {
|
2015-10-08 17:10:53 +08:00
|
|
|
auto ILHS = LHSExprs.begin();
|
|
|
|
auto IRHS = RHSExprs.begin();
|
|
|
|
auto IPriv = Privates.begin();
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
for (auto *E : ReductionOps) {
|
2016-03-29 13:34:15 +08:00
|
|
|
const Expr *XExpr = nullptr;
|
|
|
|
const Expr *EExpr = nullptr;
|
|
|
|
const Expr *UpExpr = nullptr;
|
|
|
|
BinaryOperatorKind BO = BO_Comma;
|
|
|
|
if (auto *BO = dyn_cast<BinaryOperator>(E)) {
|
|
|
|
if (BO->getOpcode() == BO_Assign) {
|
|
|
|
XExpr = BO->getLHS();
|
|
|
|
UpExpr = BO->getRHS();
|
2015-05-07 11:54:03 +08:00
|
|
|
}
|
2016-03-29 13:34:15 +08:00
|
|
|
}
|
|
|
|
// Try to emit update expression as a simple atomic.
|
|
|
|
auto *RHSExpr = UpExpr;
|
|
|
|
if (RHSExpr) {
|
|
|
|
// Analyze RHS part of the whole expression.
|
|
|
|
if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
|
|
|
|
RHSExpr->IgnoreParenImpCasts())) {
|
|
|
|
// If this is a conditional operator, analyze its condition for
|
|
|
|
// min/max reduction operator.
|
|
|
|
RHSExpr = ACO->getCond();
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
}
|
2016-03-29 13:34:15 +08:00
|
|
|
if (auto *BORHS =
|
|
|
|
dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
|
|
|
|
EExpr = BORHS->getRHS();
|
|
|
|
BO = BORHS->getOpcode();
|
2015-10-08 17:10:53 +08:00
|
|
|
}
|
2016-03-29 13:34:15 +08:00
|
|
|
}
|
|
|
|
if (XExpr) {
|
|
|
|
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
|
2017-01-14 02:55:32 +08:00
|
|
|
auto &&AtomicRedGen = [BO, VD,
|
2016-03-29 13:34:15 +08:00
|
|
|
Loc](CodeGenFunction &CGF, const Expr *XExpr,
|
|
|
|
const Expr *EExpr, const Expr *UpExpr) {
|
|
|
|
LValue X = CGF.EmitLValue(XExpr);
|
|
|
|
RValue E;
|
|
|
|
if (EExpr)
|
|
|
|
E = CGF.EmitAnyExpr(EExpr);
|
|
|
|
CGF.EmitOMPAtomicSimpleUpdateExpr(
|
2016-04-07 01:26:42 +08:00
|
|
|
X, E, BO, /*IsXLHSInRHSPart=*/true,
|
|
|
|
llvm::AtomicOrdering::Monotonic, Loc,
|
2017-01-14 02:55:32 +08:00
|
|
|
[&CGF, UpExpr, VD, Loc](RValue XRValue) {
|
2016-03-29 13:34:15 +08:00
|
|
|
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
|
|
|
|
PrivateScope.addPrivate(
|
|
|
|
VD, [&CGF, VD, XRValue, Loc]() -> Address {
|
|
|
|
Address LHSTemp = CGF.CreateMemTemp(VD->getType());
|
|
|
|
CGF.emitOMPSimpleStore(
|
|
|
|
CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
|
|
|
|
VD->getType().getNonReferenceType(), Loc);
|
|
|
|
return LHSTemp;
|
|
|
|
});
|
|
|
|
(void)PrivateScope.Privatize();
|
|
|
|
return CGF.EmitAnyExpr(UpExpr);
|
|
|
|
});
|
|
|
|
};
|
|
|
|
if ((*IPriv)->getType()->isArrayType()) {
|
|
|
|
// Emit atomic reduction for array section.
|
|
|
|
auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
|
|
|
|
EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
|
|
|
|
AtomicRedGen, XExpr, EExpr, UpExpr);
|
|
|
|
} else
|
|
|
|
// Emit atomic reduction for array subscript or single variable.
|
|
|
|
AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
|
|
|
|
} else {
|
|
|
|
// Emit as a critical region.
|
|
|
|
auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
|
|
|
|
const Expr *, const Expr *) {
|
|
|
|
auto &RT = CGF.CGM.getOpenMPRuntime();
|
|
|
|
RT.emitCriticalRegion(
|
|
|
|
CGF, ".atomic_reduction",
|
|
|
|
[=](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
Action.Enter(CGF);
|
|
|
|
emitReductionCombiner(CGF, E);
|
|
|
|
},
|
|
|
|
Loc);
|
|
|
|
};
|
|
|
|
if ((*IPriv)->getType()->isArrayType()) {
|
|
|
|
auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
|
|
|
|
auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
|
|
|
|
EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
|
|
|
|
CritRedGen);
|
|
|
|
} else
|
|
|
|
CritRedGen(CGF, nullptr, nullptr, nullptr);
|
|
|
|
}
|
2016-02-19 06:34:54 +08:00
|
|
|
++ILHS;
|
|
|
|
++IRHS;
|
|
|
|
++IPriv;
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
}
|
2016-03-29 13:34:15 +08:00
|
|
|
};
|
|
|
|
RegionCodeGenTy AtomicRCG(AtomicCodeGen);
|
|
|
|
if (!WithNowait) {
|
|
|
|
// Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
|
|
|
|
llvm::Value *EndArgs[] = {
|
|
|
|
IdentTLoc, // ident_t *<loc>
|
|
|
|
ThreadId, // i32 <gtid>
|
|
|
|
Lock // kmp_critical_name *&<lock>
|
|
|
|
};
|
|
|
|
CommonActionTy Action(nullptr, llvm::None,
|
|
|
|
createRuntimeFunction(OMPRTL__kmpc_end_reduce),
|
|
|
|
EndArgs);
|
|
|
|
AtomicRCG.setAction(Action);
|
|
|
|
AtomicRCG(CGF);
|
|
|
|
} else
|
|
|
|
AtomicRCG(CGF);
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
|
|
|
|
CGF.EmitBranch(DefaultBB);
|
|
|
|
CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
|
|
|
|
}
|
|
|
|
|
2017-07-17 21:30:36 +08:00
|
|
|
/// Generates unique name for artificial threadprivate variables.
|
|
|
|
/// Format is: <Prefix> "." <Loc_raw_encoding> "_" <N>
|
|
|
|
static std::string generateUniqueName(StringRef Prefix, SourceLocation Loc,
|
|
|
|
unsigned N) {
|
|
|
|
SmallString<256> Buffer;
|
|
|
|
llvm::raw_svector_ostream Out(Buffer);
|
|
|
|
Out << Prefix << "." << Loc.getRawEncoding() << "_" << N;
|
|
|
|
return Out.str();
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Emits reduction initializer function:
|
|
|
|
/// \code
|
|
|
|
/// void @.red_init(void* %arg) {
|
|
|
|
/// %0 = bitcast void* %arg to <type>*
|
|
|
|
/// store <type> <init>, <type>* %0
|
|
|
|
/// ret void
|
|
|
|
/// }
|
|
|
|
/// \endcode
|
|
|
|
static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
|
|
|
|
SourceLocation Loc,
|
|
|
|
ReductionCodeGen &RCG, unsigned N) {
|
|
|
|
auto &C = CGM.getContext();
|
|
|
|
FunctionArgList Args;
|
|
|
|
ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other);
|
|
|
|
Args.emplace_back(&Param);
|
|
|
|
auto &FnInfo =
|
|
|
|
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
|
|
|
|
auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
|
|
|
|
auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
|
|
|
|
".red_init.", &CGM.getModule());
|
|
|
|
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
|
|
|
|
CodeGenFunction CGF(CGM);
|
|
|
|
CGF.disableDebugInfo();
|
|
|
|
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
|
|
|
|
Address PrivateAddr = CGF.EmitLoadOfPointer(
|
|
|
|
CGF.GetAddrOfLocalVar(&Param),
|
|
|
|
C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
|
|
|
|
llvm::Value *Size = nullptr;
|
|
|
|
// If the size of the reduction item is non-constant, load it from global
|
|
|
|
// threadprivate variable.
|
|
|
|
if (RCG.getSizes(N).second) {
|
|
|
|
Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
|
|
|
|
CGF, CGM.getContext().getSizeType(),
|
|
|
|
generateUniqueName("reduction_size", Loc, N));
|
|
|
|
Size =
|
|
|
|
CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
|
|
|
|
CGM.getContext().getSizeType(), SourceLocation());
|
|
|
|
}
|
|
|
|
RCG.emitAggregateType(CGF, N, Size);
|
|
|
|
LValue SharedLVal;
|
|
|
|
// If initializer uses initializer from declare reduction construct, emit a
|
|
|
|
// pointer to the address of the original reduction item (reuired by reduction
|
|
|
|
// initializer)
|
|
|
|
if (RCG.usesReductionInitializer(N)) {
|
|
|
|
Address SharedAddr =
|
|
|
|
CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
|
|
|
|
CGF, CGM.getContext().VoidPtrTy,
|
|
|
|
generateUniqueName("reduction", Loc, N));
|
|
|
|
SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
|
|
|
|
} else {
|
|
|
|
SharedLVal = CGF.MakeNaturalAlignAddrLValue(
|
|
|
|
llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
|
|
|
|
CGM.getContext().VoidPtrTy);
|
|
|
|
}
|
|
|
|
// Emit the initializer:
|
|
|
|
// %0 = bitcast void* %arg to <type>*
|
|
|
|
// store <type> <init>, <type>* %0
|
|
|
|
RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
|
|
|
|
[](CodeGenFunction &) { return false; });
|
|
|
|
CGF.FinishFunction();
|
|
|
|
return Fn;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Emits reduction combiner function:
|
|
|
|
/// \code
|
|
|
|
/// void @.red_comb(void* %arg0, void* %arg1) {
|
|
|
|
/// %lhs = bitcast void* %arg0 to <type>*
|
|
|
|
/// %rhs = bitcast void* %arg1 to <type>*
|
|
|
|
/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
|
|
|
|
/// store <type> %2, <type>* %lhs
|
|
|
|
/// ret void
|
|
|
|
/// }
|
|
|
|
/// \endcode
|
|
|
|
static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
|
|
|
|
SourceLocation Loc,
|
|
|
|
ReductionCodeGen &RCG, unsigned N,
|
|
|
|
const Expr *ReductionOp,
|
|
|
|
const Expr *LHS, const Expr *RHS,
|
|
|
|
const Expr *PrivateRef) {
|
|
|
|
auto &C = CGM.getContext();
|
|
|
|
auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
|
|
|
|
auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
|
|
|
|
FunctionArgList Args;
|
|
|
|
ImplicitParamDecl ParamInOut(C, C.VoidPtrTy, ImplicitParamDecl::Other);
|
|
|
|
ImplicitParamDecl ParamIn(C, C.VoidPtrTy, ImplicitParamDecl::Other);
|
|
|
|
Args.emplace_back(&ParamInOut);
|
|
|
|
Args.emplace_back(&ParamIn);
|
|
|
|
auto &FnInfo =
|
|
|
|
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
|
|
|
|
auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
|
|
|
|
auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
|
|
|
|
".red_comb.", &CGM.getModule());
|
|
|
|
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
|
|
|
|
CodeGenFunction CGF(CGM);
|
|
|
|
CGF.disableDebugInfo();
|
|
|
|
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
|
|
|
|
llvm::Value *Size = nullptr;
|
|
|
|
// If the size of the reduction item is non-constant, load it from global
|
|
|
|
// threadprivate variable.
|
|
|
|
if (RCG.getSizes(N).second) {
|
|
|
|
Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
|
|
|
|
CGF, CGM.getContext().getSizeType(),
|
|
|
|
generateUniqueName("reduction_size", Loc, N));
|
|
|
|
Size =
|
|
|
|
CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
|
|
|
|
CGM.getContext().getSizeType(), SourceLocation());
|
|
|
|
}
|
|
|
|
RCG.emitAggregateType(CGF, N, Size);
|
|
|
|
// Remap lhs and rhs variables to the addresses of the function arguments.
|
|
|
|
// %lhs = bitcast void* %arg0 to <type>*
|
|
|
|
// %rhs = bitcast void* %arg1 to <type>*
|
|
|
|
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
|
|
|
|
PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() -> Address {
|
|
|
|
// Pull out the pointer to the variable.
|
|
|
|
Address PtrAddr = CGF.EmitLoadOfPointer(
|
|
|
|
CGF.GetAddrOfLocalVar(&ParamInOut),
|
|
|
|
C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
|
|
|
|
return CGF.Builder.CreateElementBitCast(
|
|
|
|
PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
|
|
|
|
});
|
|
|
|
PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() -> Address {
|
|
|
|
// Pull out the pointer to the variable.
|
|
|
|
Address PtrAddr = CGF.EmitLoadOfPointer(
|
|
|
|
CGF.GetAddrOfLocalVar(&ParamIn),
|
|
|
|
C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
|
|
|
|
return CGF.Builder.CreateElementBitCast(
|
|
|
|
PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
|
|
|
|
});
|
|
|
|
PrivateScope.Privatize();
|
|
|
|
// Emit the combiner body:
|
|
|
|
// %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
|
|
|
|
// store <type> %2, <type>* %lhs
|
|
|
|
CGM.getOpenMPRuntime().emitSingleReductionCombiner(
|
|
|
|
CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
|
|
|
|
cast<DeclRefExpr>(RHS));
|
|
|
|
CGF.FinishFunction();
|
|
|
|
return Fn;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Emits reduction finalizer function:
|
|
|
|
/// \code
|
|
|
|
/// void @.red_fini(void* %arg) {
|
|
|
|
/// %0 = bitcast void* %arg to <type>*
|
|
|
|
/// <destroy>(<type>* %0)
|
|
|
|
/// ret void
|
|
|
|
/// }
|
|
|
|
/// \endcode
|
|
|
|
static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
|
|
|
|
SourceLocation Loc,
|
|
|
|
ReductionCodeGen &RCG, unsigned N) {
|
|
|
|
if (!RCG.needCleanups(N))
|
|
|
|
return nullptr;
|
|
|
|
auto &C = CGM.getContext();
|
|
|
|
FunctionArgList Args;
|
|
|
|
ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other);
|
|
|
|
Args.emplace_back(&Param);
|
|
|
|
auto &FnInfo =
|
|
|
|
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
|
|
|
|
auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
|
|
|
|
auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
|
|
|
|
".red_fini.", &CGM.getModule());
|
|
|
|
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
|
|
|
|
CodeGenFunction CGF(CGM);
|
|
|
|
CGF.disableDebugInfo();
|
|
|
|
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
|
|
|
|
Address PrivateAddr = CGF.EmitLoadOfPointer(
|
|
|
|
CGF.GetAddrOfLocalVar(&Param),
|
|
|
|
C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
|
|
|
|
llvm::Value *Size = nullptr;
|
|
|
|
// If the size of the reduction item is non-constant, load it from global
|
|
|
|
// threadprivate variable.
|
|
|
|
if (RCG.getSizes(N).second) {
|
|
|
|
Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
|
|
|
|
CGF, CGM.getContext().getSizeType(),
|
|
|
|
generateUniqueName("reduction_size", Loc, N));
|
|
|
|
Size =
|
|
|
|
CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
|
|
|
|
CGM.getContext().getSizeType(), SourceLocation());
|
|
|
|
}
|
|
|
|
RCG.emitAggregateType(CGF, N, Size);
|
|
|
|
// Emit the finalizer body:
|
|
|
|
// <destroy>(<type>* %0)
|
|
|
|
RCG.emitCleanups(CGF, N, PrivateAddr);
|
|
|
|
CGF.FinishFunction();
|
|
|
|
return Fn;
|
|
|
|
}
|
|
|
|
|
|
|
|
llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
|
|
|
|
CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
|
|
|
|
ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
|
|
|
|
if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// Build typedef struct:
|
|
|
|
// kmp_task_red_input {
|
|
|
|
// void *reduce_shar; // shared reduction item
|
|
|
|
// size_t reduce_size; // size of data item
|
|
|
|
// void *reduce_init; // data initialization routine
|
|
|
|
// void *reduce_fini; // data finalization routine
|
|
|
|
// void *reduce_comb; // data combiner routine
|
|
|
|
// kmp_task_red_flags_t flags; // flags for additional info from compiler
|
|
|
|
// } kmp_task_red_input_t;
|
|
|
|
ASTContext &C = CGM.getContext();
|
|
|
|
auto *RD = C.buildImplicitRecord("kmp_task_red_input_t");
|
|
|
|
RD->startDefinition();
|
|
|
|
const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
|
|
|
|
const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
|
|
|
|
const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
|
|
|
|
const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
|
|
|
|
const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
|
|
|
|
const FieldDecl *FlagsFD = addFieldToRecordDecl(
|
|
|
|
C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
|
|
|
|
RD->completeDefinition();
|
|
|
|
QualType RDType = C.getRecordType(RD);
|
|
|
|
unsigned Size = Data.ReductionVars.size();
|
|
|
|
llvm::APInt ArraySize(/*numBits=*/64, Size);
|
|
|
|
QualType ArrayRDType = C.getConstantArrayType(
|
|
|
|
RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
|
|
|
|
// kmp_task_red_input_t .rd_input.[Size];
|
|
|
|
Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
|
|
|
|
ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
|
|
|
|
Data.ReductionOps);
|
|
|
|
for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
|
|
|
|
// kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
|
|
|
|
llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
|
|
|
|
llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
|
|
|
|
llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
|
|
|
|
TaskRedInput.getPointer(), Idxs,
|
|
|
|
/*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
|
|
|
|
".rd_input.gep.");
|
|
|
|
LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
|
|
|
|
// ElemLVal.reduce_shar = &Shareds[Cnt];
|
|
|
|
LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
|
|
|
|
RCG.emitSharedLValue(CGF, Cnt);
|
|
|
|
llvm::Value *CastedShared =
|
|
|
|
CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer());
|
|
|
|
CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
|
|
|
|
RCG.emitAggregateType(CGF, Cnt);
|
|
|
|
llvm::Value *SizeValInChars;
|
|
|
|
llvm::Value *SizeVal;
|
|
|
|
std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
|
|
|
|
// We use delayed creation/initialization for VLAs, array sections and
|
|
|
|
// custom reduction initializations. It is required because runtime does not
|
|
|
|
// provide the way to pass the sizes of VLAs/array sections to
|
|
|
|
// initializer/combiner/finalizer functions and does not pass the pointer to
|
|
|
|
// original reduction item to the initializer. Instead threadprivate global
|
|
|
|
// variables are used to store these values and use them in the functions.
|
|
|
|
bool DelayedCreation = !!SizeVal;
|
|
|
|
SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
|
|
|
|
/*isSigned=*/false);
|
|
|
|
LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
|
|
|
|
CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
|
|
|
|
// ElemLVal.reduce_init = init;
|
|
|
|
LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
|
|
|
|
llvm::Value *InitAddr =
|
|
|
|
CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
|
|
|
|
CGF.EmitStoreOfScalar(InitAddr, InitLVal);
|
|
|
|
DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
|
|
|
|
// ElemLVal.reduce_fini = fini;
|
|
|
|
LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
|
|
|
|
llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
|
|
|
|
llvm::Value *FiniAddr = Fini
|
|
|
|
? CGF.EmitCastToVoidPtr(Fini)
|
|
|
|
: llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
|
|
|
|
CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
|
|
|
|
// ElemLVal.reduce_comb = comb;
|
|
|
|
LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
|
|
|
|
llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
|
|
|
|
CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
|
|
|
|
RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
|
|
|
|
CGF.EmitStoreOfScalar(CombAddr, CombLVal);
|
|
|
|
// ElemLVal.flags = 0;
|
|
|
|
LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
|
|
|
|
if (DelayedCreation) {
|
|
|
|
CGF.EmitStoreOfScalar(
|
|
|
|
llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true),
|
|
|
|
FlagsLVal);
|
|
|
|
} else
|
|
|
|
CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
|
|
|
|
}
|
|
|
|
// Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
|
|
|
|
// *data);
|
|
|
|
llvm::Value *Args[] = {
|
|
|
|
CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
|
|
|
|
/*isSigned=*/true),
|
|
|
|
llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
|
|
|
|
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
|
|
|
|
CGM.VoidPtrTy)};
|
|
|
|
return CGF.EmitRuntimeCall(
|
|
|
|
createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
|
|
|
|
}
|
|
|
|
|
|
|
|
void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
|
|
|
|
SourceLocation Loc,
|
|
|
|
ReductionCodeGen &RCG,
|
|
|
|
unsigned N) {
|
|
|
|
auto Sizes = RCG.getSizes(N);
|
|
|
|
// Emit threadprivate global variable if the type is non-constant
|
|
|
|
// (Sizes.second = nullptr).
|
|
|
|
if (Sizes.second) {
|
|
|
|
llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
|
|
|
|
/*isSigned=*/false);
|
|
|
|
Address SizeAddr = getAddrOfArtificialThreadPrivate(
|
|
|
|
CGF, CGM.getContext().getSizeType(),
|
|
|
|
generateUniqueName("reduction_size", Loc, N));
|
|
|
|
CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
|
|
|
|
}
|
|
|
|
// Store address of the original reduction item if custom initializer is used.
|
|
|
|
if (RCG.usesReductionInitializer(N)) {
|
|
|
|
Address SharedAddr = getAddrOfArtificialThreadPrivate(
|
|
|
|
CGF, CGM.getContext().VoidPtrTy,
|
|
|
|
generateUniqueName("reduction", Loc, N));
|
|
|
|
CGF.Builder.CreateStore(
|
|
|
|
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
|
RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
|
|
|
|
SharedAddr, /*IsVolatile=*/false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
|
|
|
|
SourceLocation Loc,
|
|
|
|
llvm::Value *ReductionsPtr,
|
|
|
|
LValue SharedLVal) {
|
|
|
|
// Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
|
|
|
|
// *d);
|
|
|
|
llvm::Value *Args[] = {
|
|
|
|
CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
|
|
|
|
/*isSigned=*/true),
|
|
|
|
ReductionsPtr,
|
|
|
|
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(),
|
|
|
|
CGM.VoidPtrTy)};
|
|
|
|
return Address(
|
|
|
|
CGF.EmitRuntimeCall(
|
|
|
|
createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
|
|
|
|
SharedLVal.getAlignment());
|
|
|
|
}
|
|
|
|
|
2015-04-27 13:22:09 +08:00
|
|
|
void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
|
|
|
|
SourceLocation Loc) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-04-27 13:22:09 +08:00
|
|
|
// Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
|
|
|
|
// global_tid);
|
|
|
|
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
|
|
|
|
// Ignore return result until untied tasks are supported.
|
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
|
2016-04-20 12:01:36 +08:00
|
|
|
if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
|
|
|
|
Region->emitUntiedSwitch(CGF);
|
2015-04-27 13:22:09 +08:00
|
|
|
}
|
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
|
2015-07-03 17:56:58 +08:00
|
|
|
OpenMPDirectiveKind InnerKind,
|
2015-09-15 20:52:43 +08:00
|
|
|
const RegionCodeGenTy &CodeGen,
|
|
|
|
bool HasCancel) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-09-15 20:52:43 +08:00
|
|
|
InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
|
2015-04-10 12:50:10 +08:00
|
|
|
CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
|
2015-02-26 18:27:34 +08:00
|
|
|
}
|
|
|
|
|
2015-07-06 13:50:32 +08:00
|
|
|
namespace {
|
|
|
|
enum RTCancelKind {
|
|
|
|
CancelNoreq = 0,
|
|
|
|
CancelParallel = 1,
|
|
|
|
CancelLoop = 2,
|
|
|
|
CancelSections = 3,
|
|
|
|
CancelTaskgroup = 4
|
|
|
|
};
|
2016-02-11 03:11:58 +08:00
|
|
|
} // anonymous namespace
|
2015-07-06 13:50:32 +08:00
|
|
|
|
|
|
|
static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
|
|
|
|
RTCancelKind CancelKind = CancelNoreq;
|
2015-07-02 12:17:07 +08:00
|
|
|
if (CancelRegion == OMPD_parallel)
|
|
|
|
CancelKind = CancelParallel;
|
|
|
|
else if (CancelRegion == OMPD_for)
|
|
|
|
CancelKind = CancelLoop;
|
|
|
|
else if (CancelRegion == OMPD_sections)
|
|
|
|
CancelKind = CancelSections;
|
|
|
|
else {
|
|
|
|
assert(CancelRegion == OMPD_taskgroup);
|
|
|
|
CancelKind = CancelTaskgroup;
|
|
|
|
}
|
2015-07-06 13:50:32 +08:00
|
|
|
return CancelKind;
|
|
|
|
}
|
|
|
|
|
|
|
|
void CGOpenMPRuntime::emitCancellationPointCall(
|
|
|
|
CodeGenFunction &CGF, SourceLocation Loc,
|
|
|
|
OpenMPDirectiveKind CancelRegion) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-07-06 13:50:32 +08:00
|
|
|
// Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
|
|
|
|
// global_tid, kmp_int32 cncl_kind);
|
2015-07-03 17:56:58 +08:00
|
|
|
if (auto *OMPRegionInfo =
|
|
|
|
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
|
2017-02-18 02:32:58 +08:00
|
|
|
// For 'cancellation point taskgroup', the task region info may not have a
|
|
|
|
// cancel. This may instead happen in another adjacent task.
|
|
|
|
if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
|
2015-07-06 13:50:32 +08:00
|
|
|
llvm::Value *Args[] = {
|
|
|
|
emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
|
|
|
|
CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
|
2015-07-03 17:56:58 +08:00
|
|
|
// Ignore return result until untied tasks are supported.
|
|
|
|
auto *Result = CGF.EmitRuntimeCall(
|
|
|
|
createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
|
|
|
|
// if (__kmpc_cancellationpoint()) {
|
|
|
|
// exit from construct;
|
|
|
|
// }
|
|
|
|
auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
|
|
|
|
auto *ContBB = CGF.createBasicBlock(".cancel.continue");
|
|
|
|
auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
|
|
|
|
CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
|
|
|
|
CGF.EmitBlock(ExitBB);
|
|
|
|
// exit from construct;
|
2015-09-15 20:52:43 +08:00
|
|
|
auto CancelDest =
|
|
|
|
CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
|
2015-07-03 17:56:58 +08:00
|
|
|
CGF.EmitBranchThroughCleanup(CancelDest);
|
|
|
|
CGF.EmitBlock(ContBB, /*IsFinished=*/true);
|
|
|
|
}
|
2015-07-02 12:17:07 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-07-06 13:50:32 +08:00
|
|
|
void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
|
2015-09-18 16:07:34 +08:00
|
|
|
const Expr *IfCond,
|
2015-07-06 13:50:32 +08:00
|
|
|
OpenMPDirectiveKind CancelRegion) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-07-06 13:50:32 +08:00
|
|
|
// Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
|
|
|
|
// kmp_int32 cncl_kind);
|
|
|
|
if (auto *OMPRegionInfo =
|
|
|
|
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
|
2016-03-29 13:34:15 +08:00
|
|
|
auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
|
|
|
|
PrePostActionTy &) {
|
|
|
|
auto &RT = CGF.CGM.getOpenMPRuntime();
|
2015-09-18 16:07:34 +08:00
|
|
|
llvm::Value *Args[] = {
|
2016-03-29 13:34:15 +08:00
|
|
|
RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
|
2015-09-18 16:07:34 +08:00
|
|
|
CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
|
|
|
|
// Ignore return result until untied tasks are supported.
|
2016-03-29 13:34:15 +08:00
|
|
|
auto *Result = CGF.EmitRuntimeCall(
|
|
|
|
RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
|
2015-09-18 16:07:34 +08:00
|
|
|
// if (__kmpc_cancel()) {
|
|
|
|
// exit from construct;
|
|
|
|
// }
|
|
|
|
auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
|
|
|
|
auto *ContBB = CGF.createBasicBlock(".cancel.continue");
|
|
|
|
auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
|
|
|
|
CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
|
|
|
|
CGF.EmitBlock(ExitBB);
|
|
|
|
// exit from construct;
|
|
|
|
auto CancelDest =
|
|
|
|
CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
|
|
|
|
CGF.EmitBranchThroughCleanup(CancelDest);
|
|
|
|
CGF.EmitBlock(ContBB, /*IsFinished=*/true);
|
|
|
|
};
|
|
|
|
if (IfCond)
|
2016-03-29 13:34:15 +08:00
|
|
|
emitOMPIfClause(CGF, IfCond, ThenGen,
|
|
|
|
[](CodeGenFunction &, PrePostActionTy &) {});
|
|
|
|
else {
|
|
|
|
RegionCodeGenTy ThenRCG(ThenGen);
|
|
|
|
ThenRCG(CGF);
|
|
|
|
}
|
2015-07-06 13:50:32 +08:00
|
|
|
}
|
|
|
|
}
|
2015-10-03 00:14:20 +08:00
|
|
|
|
2016-01-06 21:42:12 +08:00
|
|
|
/// \brief Obtain information that uniquely identifies a target entry. This
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
/// consists of the file and device IDs as well as line number associated with
|
|
|
|
/// the relevant entry source location.
|
2016-01-06 21:42:12 +08:00
|
|
|
static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
|
|
|
|
unsigned &DeviceID, unsigned &FileID,
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
unsigned &LineNum) {
|
2016-01-06 21:42:12 +08:00
|
|
|
|
|
|
|
auto &SM = C.getSourceManager();
|
|
|
|
|
|
|
|
// The loc should be always valid and have a file ID (the user cannot use
|
|
|
|
// #pragma directives in macros)
|
|
|
|
|
|
|
|
assert(Loc.isValid() && "Source location is expected to be always valid.");
|
|
|
|
assert(Loc.isFileID() && "Source location is expected to refer to a file.");
|
|
|
|
|
|
|
|
PresumedLoc PLoc = SM.getPresumedLoc(Loc);
|
|
|
|
assert(PLoc.isValid() && "Source location is expected to be always valid.");
|
|
|
|
|
|
|
|
llvm::sys::fs::UniqueID ID;
|
|
|
|
if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
|
|
|
|
llvm_unreachable("Source file with target region no longer exists!");
|
|
|
|
|
|
|
|
DeviceID = ID.getDevice();
|
|
|
|
FileID = ID.getFile();
|
|
|
|
LineNum = PLoc.getLine();
|
|
|
|
}
|
|
|
|
|
|
|
|
void CGOpenMPRuntime::emitTargetOutlinedFunction(
|
|
|
|
const OMPExecutableDirective &D, StringRef ParentName,
|
|
|
|
llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
|
2016-03-29 13:34:15 +08:00
|
|
|
bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
|
2016-01-06 21:42:12 +08:00
|
|
|
assert(!ParentName.empty() && "Invalid target region parent name!");
|
|
|
|
|
2016-03-22 09:48:56 +08:00
|
|
|
emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
|
|
|
|
IsOffloadEntry, CodeGen);
|
|
|
|
}
|
|
|
|
|
|
|
|
void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
|
|
|
|
const OMPExecutableDirective &D, StringRef ParentName,
|
|
|
|
llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
|
|
|
|
bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
// Create a unique name for the entry function using the source location
|
|
|
|
// information of the current target region. The name will be something like:
|
2016-01-06 21:42:12 +08:00
|
|
|
//
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
// __omp_offloading_DD_FFFF_PP_lBB
|
2016-01-06 21:42:12 +08:00
|
|
|
//
|
|
|
|
// where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
// mangled name of the function that encloses the target region and BB is the
|
|
|
|
// line number of the target region.
|
2016-01-06 21:42:12 +08:00
|
|
|
|
|
|
|
unsigned DeviceID;
|
|
|
|
unsigned FileID;
|
|
|
|
unsigned Line;
|
|
|
|
getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID,
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
Line);
|
2016-01-06 21:42:12 +08:00
|
|
|
SmallString<64> EntryFnName;
|
|
|
|
{
|
|
|
|
llvm::raw_svector_ostream OS(EntryFnName);
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
|
|
|
|
<< llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
|
2016-01-06 21:42:12 +08:00
|
|
|
}
|
|
|
|
|
2016-03-22 09:48:56 +08:00
|
|
|
const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
|
|
|
|
|
2015-10-03 00:14:20 +08:00
|
|
|
CodeGenFunction CGF(CGM, true);
|
2016-01-06 21:42:12 +08:00
|
|
|
CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
|
2015-10-03 00:14:20 +08:00
|
|
|
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
|
2016-01-06 21:42:12 +08:00
|
|
|
|
2016-06-17 02:39:34 +08:00
|
|
|
OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
|
2016-01-06 21:42:12 +08:00
|
|
|
|
|
|
|
// If this target outline function is not an offload entry, we don't need to
|
|
|
|
// register it.
|
|
|
|
if (!IsOffloadEntry)
|
|
|
|
return;
|
|
|
|
|
|
|
|
// The target region ID is used by the runtime library to identify the current
|
|
|
|
// target region, so it only has to be unique and not necessarily point to
|
|
|
|
// anything. It could be the pointer to the outlined function that implements
|
|
|
|
// the target region, but we aren't using that so that the compiler doesn't
|
|
|
|
// need to keep that, and could therefore inline the host function if proven
|
|
|
|
// worthwhile during optimization. In the other hand, if emitting code for the
|
|
|
|
// device, the ID has to be the function address so that it can retrieved from
|
|
|
|
// the offloading entry and launched by the runtime library. We also mark the
|
|
|
|
// outlined function to have external linkage in case we are emitting code for
|
|
|
|
// the device, because these functions will be entry points to the device.
|
|
|
|
|
|
|
|
if (CGM.getLangOpts().OpenMPIsDevice) {
|
|
|
|
OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
|
|
|
|
OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage);
|
|
|
|
} else
|
|
|
|
OutlinedFnID = new llvm::GlobalVariable(
|
|
|
|
CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
|
|
|
|
llvm::GlobalValue::PrivateLinkage,
|
|
|
|
llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id");
|
|
|
|
|
|
|
|
// Register the information for the entry associated with this target region.
|
|
|
|
OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
|
[OpenMP] Add fields for flags in the offload entry descriptor.
Summary:
This patch adds two fields to the offload entry descriptor. One field is meant to signal Ctors/Dtors and `link` global variables, and the other is reserved for runtime library use.
Currently, these fields are only filled with zeros in the current code generation, but that will change when `declare target` is added.
The reason, we are adding these fields now is to make the code generation consistent with the runtime library proposal under review in https://reviews.llvm.org/D14031.
Reviewers: ABataev, hfinkel, carlo.bertolli, kkwli0, arpith-jacob, Hahnfeld
Subscribers: cfe-commits, caomhin, jholewinski
Differential Revision: https://reviews.llvm.org/D28298
llvm-svn: 291124
2017-01-06 00:02:49 +08:00
|
|
|
DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
|
|
|
|
/*Flags=*/0);
|
2015-10-03 00:14:20 +08:00
|
|
|
}
|
|
|
|
|
2016-04-29 09:37:30 +08:00
|
|
|
/// discard all CompoundStmts intervening between two constructs
|
|
|
|
static const Stmt *ignoreCompoundStmts(const Stmt *Body) {
|
|
|
|
while (auto *CS = dyn_cast_or_null<CompoundStmt>(Body))
|
|
|
|
Body = CS->body_front();
|
|
|
|
|
|
|
|
return Body;
|
|
|
|
}
|
|
|
|
|
2017-01-25 08:57:16 +08:00
|
|
|
/// Emit the number of teams for a target directive. Inspect the num_teams
|
|
|
|
/// clause associated with a teams construct combined or closely nested
|
|
|
|
/// with the target directive.
|
|
|
|
///
|
|
|
|
/// Emit a team of size one for directives such as 'target parallel' that
|
|
|
|
/// have no associated teams construct.
|
|
|
|
///
|
|
|
|
/// Otherwise, return nullptr.
|
2016-03-04 00:20:23 +08:00
|
|
|
static llvm::Value *
|
2017-01-25 08:57:16 +08:00
|
|
|
emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
|
|
|
|
CodeGenFunction &CGF,
|
|
|
|
const OMPExecutableDirective &D) {
|
2016-03-04 00:20:23 +08:00
|
|
|
|
|
|
|
assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
|
|
|
|
"teams directive expected to be "
|
|
|
|
"emitted only for the host!");
|
|
|
|
|
2017-01-25 10:18:43 +08:00
|
|
|
auto &Bld = CGF.Builder;
|
|
|
|
|
|
|
|
// If the target directive is combined with a teams directive:
|
|
|
|
// Return the value in the num_teams clause, if any.
|
|
|
|
// Otherwise, return 0 to denote the runtime default.
|
|
|
|
if (isOpenMPTeamsDirective(D.getDirectiveKind())) {
|
|
|
|
if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) {
|
|
|
|
CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
|
|
|
|
auto NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(),
|
|
|
|
/*IgnoreResultAssign*/ true);
|
|
|
|
return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
|
|
|
|
/*IsSigned=*/true);
|
|
|
|
}
|
|
|
|
|
|
|
|
// The default value is 0.
|
|
|
|
return Bld.getInt32(0);
|
|
|
|
}
|
|
|
|
|
2017-01-25 08:57:16 +08:00
|
|
|
// If the target directive is combined with a parallel directive but not a
|
|
|
|
// teams directive, start one team.
|
2017-01-25 10:18:43 +08:00
|
|
|
if (isOpenMPParallelDirective(D.getDirectiveKind()))
|
|
|
|
return Bld.getInt32(1);
|
2016-03-04 00:20:23 +08:00
|
|
|
|
|
|
|
// If the current target region has a teams region enclosed, we need to get
|
|
|
|
// the number of teams to pass to the runtime function call. This is done
|
|
|
|
// by generating the expression in a inlined region. This is required because
|
|
|
|
// the expression is captured in the enclosing target environment when the
|
|
|
|
// teams directive is not combined with target.
|
|
|
|
|
|
|
|
const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
|
|
|
|
|
|
|
|
// FIXME: Accommodate other combined directives with teams when they become
|
|
|
|
// available.
|
2016-04-29 09:37:30 +08:00
|
|
|
if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>(
|
|
|
|
ignoreCompoundStmts(CS.getCapturedStmt()))) {
|
2016-03-04 00:20:23 +08:00
|
|
|
if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
|
|
|
|
CGOpenMPInnerExprInfo CGInfo(CGF, CS);
|
|
|
|
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
|
|
|
|
llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
|
2017-01-25 10:18:43 +08:00
|
|
|
return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
|
|
|
|
/*IsSigned=*/true);
|
2016-03-04 00:20:23 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// If we have an enclosed teams directive but no num_teams clause we use
|
|
|
|
// the default value 0.
|
2017-01-25 10:18:43 +08:00
|
|
|
return Bld.getInt32(0);
|
2016-03-04 00:20:23 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// No teams associated with the directive.
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2017-01-25 08:57:16 +08:00
|
|
|
/// Emit the number of threads for a target directive. Inspect the
|
|
|
|
/// thread_limit clause associated with a teams construct combined or closely
|
|
|
|
/// nested with the target directive.
|
|
|
|
///
|
|
|
|
/// Emit the num_threads clause for directives such as 'target parallel' that
|
|
|
|
/// have no associated teams construct.
|
|
|
|
///
|
|
|
|
/// Otherwise, return nullptr.
|
2016-03-04 00:20:23 +08:00
|
|
|
static llvm::Value *
|
2017-01-25 08:57:16 +08:00
|
|
|
emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
|
|
|
|
CodeGenFunction &CGF,
|
|
|
|
const OMPExecutableDirective &D) {
|
2016-03-04 00:20:23 +08:00
|
|
|
|
|
|
|
assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
|
|
|
|
"teams directive expected to be "
|
|
|
|
"emitted only for the host!");
|
|
|
|
|
2017-01-25 08:57:16 +08:00
|
|
|
auto &Bld = CGF.Builder;
|
|
|
|
|
|
|
|
//
|
|
|
|
// If the target directive is combined with a teams directive:
|
|
|
|
// Return the value in the thread_limit clause, if any.
|
|
|
|
//
|
|
|
|
// If the target directive is combined with a parallel directive:
|
|
|
|
// Return the value in the num_threads clause, if any.
|
|
|
|
//
|
|
|
|
// If both clauses are set, select the minimum of the two.
|
|
|
|
//
|
|
|
|
// If neither teams or parallel combined directives set the number of threads
|
|
|
|
// in a team, return 0 to denote the runtime default.
|
|
|
|
//
|
|
|
|
// If this is not a teams directive return nullptr.
|
|
|
|
|
2017-01-25 10:18:43 +08:00
|
|
|
if (isOpenMPTeamsDirective(D.getDirectiveKind()) ||
|
|
|
|
isOpenMPParallelDirective(D.getDirectiveKind())) {
|
2017-01-25 08:57:16 +08:00
|
|
|
llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0);
|
|
|
|
llvm::Value *NumThreadsVal = nullptr;
|
2017-01-25 10:18:43 +08:00
|
|
|
llvm::Value *ThreadLimitVal = nullptr;
|
|
|
|
|
|
|
|
if (const auto *ThreadLimitClause =
|
|
|
|
D.getSingleClause<OMPThreadLimitClause>()) {
|
|
|
|
CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
|
|
|
|
auto ThreadLimit = CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(),
|
|
|
|
/*IgnoreResultAssign*/ true);
|
|
|
|
ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty,
|
|
|
|
/*IsSigned=*/true);
|
|
|
|
}
|
2017-01-25 08:57:16 +08:00
|
|
|
|
|
|
|
if (const auto *NumThreadsClause =
|
|
|
|
D.getSingleClause<OMPNumThreadsClause>()) {
|
|
|
|
CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
|
|
|
|
llvm::Value *NumThreads =
|
|
|
|
CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
|
|
|
|
/*IgnoreResultAssign*/ true);
|
|
|
|
NumThreadsVal =
|
|
|
|
Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true);
|
|
|
|
}
|
|
|
|
|
2017-01-25 10:18:43 +08:00
|
|
|
// Select the lesser of thread_limit and num_threads.
|
|
|
|
if (NumThreadsVal)
|
|
|
|
ThreadLimitVal = ThreadLimitVal
|
|
|
|
? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal,
|
|
|
|
ThreadLimitVal),
|
|
|
|
NumThreadsVal, ThreadLimitVal)
|
|
|
|
: NumThreadsVal;
|
2016-03-04 00:20:23 +08:00
|
|
|
|
2017-01-25 10:18:43 +08:00
|
|
|
// Set default value passed to the runtime if either teams or a target
|
|
|
|
// parallel type directive is found but no clause is specified.
|
|
|
|
if (!ThreadLimitVal)
|
|
|
|
ThreadLimitVal = DefaultThreadLimitVal;
|
|
|
|
|
|
|
|
return ThreadLimitVal;
|
|
|
|
}
|
2017-01-25 09:45:59 +08:00
|
|
|
|
2016-03-04 00:20:23 +08:00
|
|
|
// If the current target region has a teams region enclosed, we need to get
|
|
|
|
// the thread limit to pass to the runtime function call. This is done
|
|
|
|
// by generating the expression in a inlined region. This is required because
|
|
|
|
// the expression is captured in the enclosing target environment when the
|
|
|
|
// teams directive is not combined with target.
|
|
|
|
|
|
|
|
const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
|
|
|
|
|
|
|
|
// FIXME: Accommodate other combined directives with teams when they become
|
|
|
|
// available.
|
2016-04-29 09:37:30 +08:00
|
|
|
if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>(
|
|
|
|
ignoreCompoundStmts(CS.getCapturedStmt()))) {
|
2016-03-04 00:20:23 +08:00
|
|
|
if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
|
|
|
|
CGOpenMPInnerExprInfo CGInfo(CGF, CS);
|
|
|
|
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
|
|
|
|
llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit());
|
|
|
|
return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty,
|
|
|
|
/*IsSigned=*/true);
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we have an enclosed teams directive but no thread_limit clause we use
|
|
|
|
// the default value 0.
|
|
|
|
return CGF.Builder.getInt32(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
// No teams associated with the directive.
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2016-04-28 06:40:57 +08:00
|
|
|
namespace {
|
|
|
|
// \brief Utility to handle information from clauses associated with a given
|
|
|
|
// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
|
|
|
|
// It provides a convenient interface to obtain the information and generate
|
|
|
|
// code for that information.
|
|
|
|
class MappableExprsHandler {
|
|
|
|
public:
|
2015-10-03 00:14:20 +08:00
|
|
|
/// \brief Values for bit flags used to specify the mapping type for
|
|
|
|
/// offloading.
|
|
|
|
enum OpenMPOffloadMappingFlags {
|
|
|
|
/// \brief Allocate memory on the device and move data from host to device.
|
|
|
|
OMP_MAP_TO = 0x01,
|
|
|
|
/// \brief Allocate memory on the device and move data from device to host.
|
|
|
|
OMP_MAP_FROM = 0x02,
|
2016-04-28 06:40:57 +08:00
|
|
|
/// \brief Always perform the requested mapping action on the element, even
|
|
|
|
/// if it was already mapped before.
|
|
|
|
OMP_MAP_ALWAYS = 0x04,
|
|
|
|
/// \brief Delete the element from the device environment, ignoring the
|
|
|
|
/// current reference count associated with the element.
|
2016-05-27 00:48:10 +08:00
|
|
|
OMP_MAP_DELETE = 0x08,
|
|
|
|
/// \brief The element being mapped is a pointer, therefore the pointee
|
|
|
|
/// should be mapped as well.
|
|
|
|
OMP_MAP_IS_PTR = 0x10,
|
|
|
|
/// \brief This flags signals that an argument is the first one relating to
|
|
|
|
/// a map/private clause expression. For some cases a single
|
|
|
|
/// map/privatization results in multiple arguments passed to the runtime
|
|
|
|
/// library.
|
|
|
|
OMP_MAP_FIRST_REF = 0x20,
|
2016-07-28 22:23:26 +08:00
|
|
|
/// \brief Signal that the runtime library has to return the device pointer
|
|
|
|
/// in the current position for the data being mapped.
|
|
|
|
OMP_MAP_RETURN_PTR = 0x40,
|
2016-05-27 00:53:38 +08:00
|
|
|
/// \brief This flag signals that the reference being passed is a pointer to
|
|
|
|
/// private data.
|
|
|
|
OMP_MAP_PRIVATE_PTR = 0x80,
|
2015-12-03 01:44:43 +08:00
|
|
|
/// \brief Pass the element to the device by value.
|
2016-05-27 00:48:10 +08:00
|
|
|
OMP_MAP_PRIVATE_VAL = 0x100,
|
2015-10-03 00:14:20 +08:00
|
|
|
};
|
|
|
|
|
2016-07-28 22:23:26 +08:00
|
|
|
/// Class that associates information with a base pointer to be passed to the
|
|
|
|
/// runtime library.
|
|
|
|
class BasePointerInfo {
|
|
|
|
/// The base pointer.
|
|
|
|
llvm::Value *Ptr = nullptr;
|
|
|
|
/// The base declaration that refers to this device pointer, or null if
|
|
|
|
/// there is none.
|
|
|
|
const ValueDecl *DevPtrDecl = nullptr;
|
|
|
|
|
|
|
|
public:
|
|
|
|
BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
|
|
|
|
: Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
|
|
|
|
llvm::Value *operator*() const { return Ptr; }
|
|
|
|
const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
|
|
|
|
void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
|
|
|
|
};
|
|
|
|
|
|
|
|
typedef SmallVector<BasePointerInfo, 16> MapBaseValuesArrayTy;
|
2016-04-28 06:40:57 +08:00
|
|
|
typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy;
|
|
|
|
typedef SmallVector<unsigned, 16> MapFlagsArrayTy;
|
|
|
|
|
|
|
|
private:
|
|
|
|
/// \brief Directive from where the map clauses were extracted.
|
2016-07-28 23:31:29 +08:00
|
|
|
const OMPExecutableDirective &CurDir;
|
2016-04-28 06:40:57 +08:00
|
|
|
|
|
|
|
/// \brief Function the directive is being generated for.
|
|
|
|
CodeGenFunction &CGF;
|
|
|
|
|
2016-05-27 00:53:38 +08:00
|
|
|
/// \brief Set of all first private variables in the current directive.
|
|
|
|
llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls;
|
|
|
|
|
2016-07-28 22:25:09 +08:00
|
|
|
/// Map between device pointer declarations and their expression components.
|
|
|
|
/// The key value for declarations in 'this' is null.
|
|
|
|
llvm::DenseMap<
|
|
|
|
const ValueDecl *,
|
|
|
|
SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
|
|
|
|
DevPointersMap;
|
|
|
|
|
2016-04-28 06:40:57 +08:00
|
|
|
llvm::Value *getExprTypeSize(const Expr *E) const {
|
|
|
|
auto ExprTy = E->getType().getCanonicalType();
|
|
|
|
|
|
|
|
// Reference types are ignored for mapping purposes.
|
|
|
|
if (auto *RefTy = ExprTy->getAs<ReferenceType>())
|
|
|
|
ExprTy = RefTy->getPointeeType().getCanonicalType();
|
|
|
|
|
|
|
|
// Given that an array section is considered a built-in type, we need to
|
|
|
|
// do the calculation based on the length of the section instead of relying
|
|
|
|
// on CGF.getTypeSize(E->getType()).
|
|
|
|
if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
|
|
|
|
QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
|
|
|
|
OAE->getBase()->IgnoreParenImpCasts())
|
|
|
|
.getCanonicalType();
|
|
|
|
|
|
|
|
// If there is no length associated with the expression, that means we
|
|
|
|
// are using the whole length of the base.
|
|
|
|
if (!OAE->getLength() && OAE->getColonLoc().isValid())
|
|
|
|
return CGF.getTypeSize(BaseTy);
|
|
|
|
|
|
|
|
llvm::Value *ElemSize;
|
|
|
|
if (auto *PTy = BaseTy->getAs<PointerType>())
|
|
|
|
ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
|
|
|
|
else {
|
|
|
|
auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
|
|
|
|
assert(ATy && "Expecting array type if not a pointer type.");
|
|
|
|
ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we don't have a length at this point, that is because we have an
|
|
|
|
// array section with a single element.
|
|
|
|
if (!OAE->getLength())
|
|
|
|
return ElemSize;
|
|
|
|
|
|
|
|
auto *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
|
|
|
|
LengthVal =
|
|
|
|
CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
|
|
|
|
return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
|
|
|
|
}
|
|
|
|
return CGF.getTypeSize(ExprTy);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// \brief Return the corresponding bits for a given map clause modifier. Add
|
|
|
|
/// a flag marking the map as a pointer if requested. Add a flag marking the
|
2016-05-27 00:48:10 +08:00
|
|
|
/// map as the first one of a series of maps that relate to the same map
|
|
|
|
/// expression.
|
2016-04-28 06:40:57 +08:00
|
|
|
unsigned getMapTypeBits(OpenMPMapClauseKind MapType,
|
|
|
|
OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag,
|
2016-05-27 00:48:10 +08:00
|
|
|
bool AddIsFirstFlag) const {
|
2016-04-28 06:40:57 +08:00
|
|
|
unsigned Bits = 0u;
|
|
|
|
switch (MapType) {
|
|
|
|
case OMPC_MAP_alloc:
|
2016-05-27 00:48:10 +08:00
|
|
|
case OMPC_MAP_release:
|
|
|
|
// alloc and release is the default behavior in the runtime library, i.e.
|
|
|
|
// if we don't pass any bits alloc/release that is what the runtime is
|
|
|
|
// going to do. Therefore, we don't need to signal anything for these two
|
|
|
|
// type modifiers.
|
2016-04-28 06:40:57 +08:00
|
|
|
break;
|
|
|
|
case OMPC_MAP_to:
|
|
|
|
Bits = OMP_MAP_TO;
|
|
|
|
break;
|
|
|
|
case OMPC_MAP_from:
|
|
|
|
Bits = OMP_MAP_FROM;
|
|
|
|
break;
|
|
|
|
case OMPC_MAP_tofrom:
|
|
|
|
Bits = OMP_MAP_TO | OMP_MAP_FROM;
|
|
|
|
break;
|
|
|
|
case OMPC_MAP_delete:
|
|
|
|
Bits = OMP_MAP_DELETE;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unexpected map type!");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (AddPtrFlag)
|
2016-05-27 00:48:10 +08:00
|
|
|
Bits |= OMP_MAP_IS_PTR;
|
|
|
|
if (AddIsFirstFlag)
|
|
|
|
Bits |= OMP_MAP_FIRST_REF;
|
2016-04-28 06:40:57 +08:00
|
|
|
if (MapTypeModifier == OMPC_MAP_always)
|
|
|
|
Bits |= OMP_MAP_ALWAYS;
|
|
|
|
return Bits;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// \brief Return true if the provided expression is a final array section. A
|
|
|
|
/// final array section, is one whose length can't be proved to be one.
|
|
|
|
bool isFinalArraySectionExpression(const Expr *E) const {
|
|
|
|
auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
|
|
|
|
|
|
|
|
// It is not an array section and therefore not a unity-size one.
|
|
|
|
if (!OASE)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// An array section with no colon always refer to a single element.
|
|
|
|
if (OASE->getColonLoc().isInvalid())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
auto *Length = OASE->getLength();
|
|
|
|
|
|
|
|
// If we don't have a length we have to check if the array has size 1
|
|
|
|
// for this dimension. Also, we should always expect a length if the
|
|
|
|
// base type is pointer.
|
|
|
|
if (!Length) {
|
|
|
|
auto BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
|
|
|
|
OASE->getBase()->IgnoreParenImpCasts())
|
|
|
|
.getCanonicalType();
|
|
|
|
if (auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
|
|
|
|
return ATy->getSize().getSExtValue() != 1;
|
|
|
|
// If we don't have a constant dimension length, we have to consider
|
|
|
|
// the current section as having any size, so it is not necessarily
|
|
|
|
// unitary. If it happen to be unity size, that's user fault.
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check if the length evaluates to 1.
|
|
|
|
llvm::APSInt ConstLength;
|
|
|
|
if (!Length->EvaluateAsInt(ConstLength, CGF.getContext()))
|
|
|
|
return true; // Can have more that size 1.
|
|
|
|
|
|
|
|
return ConstLength.getSExtValue() != 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// \brief Generate the base pointers, section pointers, sizes and map type
|
|
|
|
/// bits for the provided map type, map modifier, and expression components.
|
|
|
|
/// \a IsFirstComponent should be set to true if the provided set of
|
|
|
|
/// components is the first associated with a capture.
|
|
|
|
void generateInfoForComponentList(
|
|
|
|
OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
|
|
|
|
OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
|
2016-07-28 22:23:26 +08:00
|
|
|
MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
|
2016-04-28 06:40:57 +08:00
|
|
|
MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
|
|
|
|
bool IsFirstComponentList) const {
|
|
|
|
|
|
|
|
// The following summarizes what has to be generated for each map and the
|
|
|
|
// types bellow. The generated information is expressed in this order:
|
|
|
|
// base pointer, section pointer, size, flags
|
|
|
|
// (to add to the ones that come from the map type and modifier).
|
|
|
|
//
|
|
|
|
// double d;
|
|
|
|
// int i[100];
|
|
|
|
// float *p;
|
|
|
|
//
|
|
|
|
// struct S1 {
|
|
|
|
// int i;
|
|
|
|
// float f[50];
|
|
|
|
// }
|
|
|
|
// struct S2 {
|
|
|
|
// int i;
|
|
|
|
// float f[50];
|
|
|
|
// S1 s;
|
|
|
|
// double *p;
|
|
|
|
// struct S2 *ps;
|
|
|
|
// }
|
|
|
|
// S2 s;
|
|
|
|
// S2 *ps;
|
|
|
|
//
|
|
|
|
// map(d)
|
|
|
|
// &d, &d, sizeof(double), noflags
|
|
|
|
//
|
|
|
|
// map(i)
|
|
|
|
// &i, &i, 100*sizeof(int), noflags
|
|
|
|
//
|
|
|
|
// map(i[1:23])
|
|
|
|
// &i(=&i[0]), &i[1], 23*sizeof(int), noflags
|
|
|
|
//
|
|
|
|
// map(p)
|
|
|
|
// &p, &p, sizeof(float*), noflags
|
|
|
|
//
|
|
|
|
// map(p[1:24])
|
|
|
|
// p, &p[1], 24*sizeof(float), noflags
|
|
|
|
//
|
|
|
|
// map(s)
|
|
|
|
// &s, &s, sizeof(S2), noflags
|
|
|
|
//
|
|
|
|
// map(s.i)
|
|
|
|
// &s, &(s.i), sizeof(int), noflags
|
|
|
|
//
|
|
|
|
// map(s.s.f)
|
|
|
|
// &s, &(s.i.f), 50*sizeof(int), noflags
|
|
|
|
//
|
|
|
|
// map(s.p)
|
|
|
|
// &s, &(s.p), sizeof(double*), noflags
|
|
|
|
//
|
|
|
|
// map(s.p[:22], s.a s.b)
|
|
|
|
// &s, &(s.p), sizeof(double*), noflags
|
|
|
|
// &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag + extra_flag
|
|
|
|
//
|
|
|
|
// map(s.ps)
|
|
|
|
// &s, &(s.ps), sizeof(S2*), noflags
|
|
|
|
//
|
|
|
|
// map(s.ps->s.i)
|
|
|
|
// &s, &(s.ps), sizeof(S2*), noflags
|
|
|
|
// &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag + extra_flag
|
|
|
|
//
|
|
|
|
// map(s.ps->ps)
|
|
|
|
// &s, &(s.ps), sizeof(S2*), noflags
|
|
|
|
// &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
|
|
|
|
//
|
|
|
|
// map(s.ps->ps->ps)
|
|
|
|
// &s, &(s.ps), sizeof(S2*), noflags
|
|
|
|
// &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
|
|
|
|
// &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
|
|
|
|
//
|
|
|
|
// map(s.ps->ps->s.f[:22])
|
|
|
|
// &s, &(s.ps), sizeof(S2*), noflags
|
|
|
|
// &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
|
|
|
|
// &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + extra_flag
|
|
|
|
//
|
|
|
|
// map(ps)
|
|
|
|
// &ps, &ps, sizeof(S2*), noflags
|
|
|
|
//
|
|
|
|
// map(ps->i)
|
|
|
|
// ps, &(ps->i), sizeof(int), noflags
|
|
|
|
//
|
|
|
|
// map(ps->s.f)
|
|
|
|
// ps, &(ps->s.f[0]), 50*sizeof(float), noflags
|
|
|
|
//
|
|
|
|
// map(ps->p)
|
|
|
|
// ps, &(ps->p), sizeof(double*), noflags
|
|
|
|
//
|
|
|
|
// map(ps->p[:22])
|
|
|
|
// ps, &(ps->p), sizeof(double*), noflags
|
|
|
|
// &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag + extra_flag
|
|
|
|
//
|
|
|
|
// map(ps->ps)
|
|
|
|
// ps, &(ps->ps), sizeof(S2*), noflags
|
|
|
|
//
|
|
|
|
// map(ps->ps->s.i)
|
|
|
|
// ps, &(ps->ps), sizeof(S2*), noflags
|
|
|
|
// &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag + extra_flag
|
|
|
|
//
|
|
|
|
// map(ps->ps->ps)
|
|
|
|
// ps, &(ps->ps), sizeof(S2*), noflags
|
|
|
|
// &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
|
|
|
|
//
|
|
|
|
// map(ps->ps->ps->ps)
|
|
|
|
// ps, &(ps->ps), sizeof(S2*), noflags
|
|
|
|
// &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
|
|
|
|
// &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
|
|
|
|
//
|
|
|
|
// map(ps->ps->ps->s.f[:22])
|
|
|
|
// ps, &(ps->ps), sizeof(S2*), noflags
|
|
|
|
// &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
|
|
|
|
// &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag +
|
|
|
|
// extra_flag
|
|
|
|
|
|
|
|
// Track if the map information being generated is the first for a capture.
|
|
|
|
bool IsCaptureFirstInfo = IsFirstComponentList;
|
|
|
|
|
|
|
|
// Scan the components from the base to the complete expression.
|
|
|
|
auto CI = Components.rbegin();
|
|
|
|
auto CE = Components.rend();
|
|
|
|
auto I = CI;
|
|
|
|
|
|
|
|
// Track if the map information being generated is the first for a list of
|
|
|
|
// components.
|
|
|
|
bool IsExpressionFirstInfo = true;
|
|
|
|
llvm::Value *BP = nullptr;
|
|
|
|
|
|
|
|
if (auto *ME = dyn_cast<MemberExpr>(I->getAssociatedExpression())) {
|
|
|
|
// The base is the 'this' pointer. The content of the pointer is going
|
|
|
|
// to be the base of the field being mapped.
|
|
|
|
BP = CGF.EmitScalarExpr(ME->getBase());
|
|
|
|
} else {
|
|
|
|
// The base is the reference to the variable.
|
|
|
|
// BP = &Var.
|
|
|
|
BP = CGF.EmitLValue(cast<DeclRefExpr>(I->getAssociatedExpression()))
|
|
|
|
.getPointer();
|
|
|
|
|
|
|
|
// If the variable is a pointer and is being dereferenced (i.e. is not
|
2016-06-11 02:53:04 +08:00
|
|
|
// the last component), the base has to be the pointer itself, not its
|
2016-07-28 06:49:49 +08:00
|
|
|
// reference. References are ignored for mapping purposes.
|
|
|
|
QualType Ty =
|
|
|
|
I->getAssociatedDeclaration()->getType().getNonReferenceType();
|
|
|
|
if (Ty->isAnyPointerType() && std::next(I) != CE) {
|
|
|
|
auto PtrAddr = CGF.MakeNaturalAlignAddrLValue(BP, Ty);
|
2016-04-28 06:40:57 +08:00
|
|
|
BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(),
|
2016-07-28 06:49:49 +08:00
|
|
|
Ty->castAs<PointerType>())
|
2016-04-28 06:40:57 +08:00
|
|
|
.getPointer();
|
|
|
|
|
|
|
|
// We do not need to generate individual map information for the
|
|
|
|
// pointer, it can be associated with the combined storage.
|
|
|
|
++I;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (; I != CE; ++I) {
|
|
|
|
auto Next = std::next(I);
|
|
|
|
|
|
|
|
// We need to generate the addresses and sizes if this is the last
|
|
|
|
// component, if the component is a pointer or if it is an array section
|
|
|
|
// whose length can't be proved to be one. If this is a pointer, it
|
|
|
|
// becomes the base address for the following components.
|
|
|
|
|
|
|
|
// A final array section, is one whose length can't be proved to be one.
|
|
|
|
bool IsFinalArraySection =
|
|
|
|
isFinalArraySectionExpression(I->getAssociatedExpression());
|
|
|
|
|
|
|
|
// Get information on whether the element is a pointer. Have to do a
|
|
|
|
// special treatment for array sections given that they are built-in
|
|
|
|
// types.
|
|
|
|
const auto *OASE =
|
|
|
|
dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
|
|
|
|
bool IsPointer =
|
|
|
|
(OASE &&
|
|
|
|
OMPArraySectionExpr::getBaseOriginalType(OASE)
|
|
|
|
.getCanonicalType()
|
|
|
|
->isAnyPointerType()) ||
|
|
|
|
I->getAssociatedExpression()->getType()->isAnyPointerType();
|
|
|
|
|
|
|
|
if (Next == CE || IsPointer || IsFinalArraySection) {
|
|
|
|
|
|
|
|
// If this is not the last component, we expect the pointer to be
|
|
|
|
// associated with an array expression or member expression.
|
|
|
|
assert((Next == CE ||
|
|
|
|
isa<MemberExpr>(Next->getAssociatedExpression()) ||
|
|
|
|
isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
|
|
|
|
isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
|
|
|
|
"Unexpected expression");
|
|
|
|
|
|
|
|
auto *LB = CGF.EmitLValue(I->getAssociatedExpression()).getPointer();
|
|
|
|
auto *Size = getExprTypeSize(I->getAssociatedExpression());
|
|
|
|
|
2016-07-28 06:52:16 +08:00
|
|
|
// If we have a member expression and the current component is a
|
|
|
|
// reference, we have to map the reference too. Whenever we have a
|
|
|
|
// reference, the section that reference refers to is going to be a
|
|
|
|
// load instruction from the storage assigned to the reference.
|
|
|
|
if (isa<MemberExpr>(I->getAssociatedExpression()) &&
|
|
|
|
I->getAssociatedDeclaration()->getType()->isReferenceType()) {
|
|
|
|
auto *LI = cast<llvm::LoadInst>(LB);
|
|
|
|
auto *RefAddr = LI->getPointerOperand();
|
|
|
|
|
|
|
|
BasePointers.push_back(BP);
|
|
|
|
Pointers.push_back(RefAddr);
|
|
|
|
Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
|
|
|
|
Types.push_back(getMapTypeBits(
|
|
|
|
/*MapType*/ OMPC_MAP_alloc, /*MapTypeModifier=*/OMPC_MAP_unknown,
|
|
|
|
!IsExpressionFirstInfo, IsCaptureFirstInfo));
|
|
|
|
IsExpressionFirstInfo = false;
|
|
|
|
IsCaptureFirstInfo = false;
|
|
|
|
// The reference will be the next base address.
|
|
|
|
BP = RefAddr;
|
|
|
|
}
|
|
|
|
|
|
|
|
BasePointers.push_back(BP);
|
2016-04-28 06:40:57 +08:00
|
|
|
Pointers.push_back(LB);
|
|
|
|
Sizes.push_back(Size);
|
2016-07-28 06:52:16 +08:00
|
|
|
|
2016-05-27 00:48:10 +08:00
|
|
|
// We need to add a pointer flag for each map that comes from the
|
|
|
|
// same expression except for the first one. We also need to signal
|
|
|
|
// this map is the first one that relates with the current capture
|
|
|
|
// (there is a set of entries for each capture).
|
2016-04-28 06:40:57 +08:00
|
|
|
Types.push_back(getMapTypeBits(MapType, MapTypeModifier,
|
|
|
|
!IsExpressionFirstInfo,
|
2016-05-27 00:48:10 +08:00
|
|
|
IsCaptureFirstInfo));
|
2016-04-28 06:40:57 +08:00
|
|
|
|
|
|
|
// If we have a final array section, we are done with this expression.
|
|
|
|
if (IsFinalArraySection)
|
|
|
|
break;
|
|
|
|
|
|
|
|
// The pointer becomes the base for the next element.
|
|
|
|
if (Next != CE)
|
|
|
|
BP = LB;
|
|
|
|
|
|
|
|
IsExpressionFirstInfo = false;
|
|
|
|
IsCaptureFirstInfo = false;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-05-27 00:53:38 +08:00
|
|
|
/// \brief Return the adjusted map modifiers if the declaration a capture
|
|
|
|
/// refers to appears in a first-private clause. This is expected to be used
|
|
|
|
/// only with directives that start with 'target'.
|
|
|
|
unsigned adjustMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap,
|
|
|
|
unsigned CurrentModifiers) {
|
|
|
|
assert(Cap.capturesVariable() && "Expected capture by reference only!");
|
|
|
|
|
|
|
|
// A first private variable captured by reference will use only the
|
|
|
|
// 'private ptr' and 'map to' flag. Return the right flags if the captured
|
|
|
|
// declaration is known as first-private in this handler.
|
|
|
|
if (FirstPrivateDecls.count(Cap.getCapturedVar()))
|
|
|
|
return MappableExprsHandler::OMP_MAP_PRIVATE_PTR |
|
|
|
|
MappableExprsHandler::OMP_MAP_TO;
|
|
|
|
|
|
|
|
// We didn't modify anything.
|
|
|
|
return CurrentModifiers;
|
|
|
|
}
|
|
|
|
|
2016-04-28 06:40:57 +08:00
|
|
|
public:
|
|
|
|
MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
|
2016-07-28 23:31:29 +08:00
|
|
|
: CurDir(Dir), CGF(CGF) {
|
2016-05-27 00:53:38 +08:00
|
|
|
// Extract firstprivate clause information.
|
|
|
|
for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
|
|
|
|
for (const auto *D : C->varlists())
|
|
|
|
FirstPrivateDecls.insert(
|
|
|
|
cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
|
2016-07-28 22:25:09 +08:00
|
|
|
// Extract device pointer clause information.
|
|
|
|
for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
|
|
|
|
for (auto L : C->component_lists())
|
|
|
|
DevPointersMap[L.first].push_back(L.second);
|
2016-05-27 00:53:38 +08:00
|
|
|
}
|
2016-04-28 06:40:57 +08:00
|
|
|
|
|
|
|
/// \brief Generate all the base pointers, section pointers, sizes and map
|
2016-07-28 22:23:26 +08:00
|
|
|
/// types for the extracted mappable expressions. Also, for each item that
|
|
|
|
/// relates with a device pointer, a pair of the relevant declaration and
|
|
|
|
/// index where it occurs is appended to the device pointers info array.
|
|
|
|
void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
|
2016-04-28 06:40:57 +08:00
|
|
|
MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
|
|
|
|
MapFlagsArrayTy &Types) const {
|
|
|
|
BasePointers.clear();
|
|
|
|
Pointers.clear();
|
|
|
|
Sizes.clear();
|
|
|
|
Types.clear();
|
|
|
|
|
|
|
|
struct MapInfo {
|
2016-07-28 22:23:26 +08:00
|
|
|
/// Kind that defines how a device pointer has to be returned.
|
|
|
|
enum ReturnPointerKind {
|
|
|
|
// Don't have to return any pointer.
|
|
|
|
RPK_None,
|
|
|
|
// Pointer is the base of the declaration.
|
|
|
|
RPK_Base,
|
|
|
|
// Pointer is a member of the base declaration - 'this'
|
|
|
|
RPK_Member,
|
|
|
|
// Pointer is a reference and a member of the base declaration - 'this'
|
|
|
|
RPK_MemberReference,
|
|
|
|
};
|
2016-04-28 06:40:57 +08:00
|
|
|
OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
|
2016-07-30 08:41:37 +08:00
|
|
|
OpenMPMapClauseKind MapType;
|
|
|
|
OpenMPMapClauseKind MapTypeModifier;
|
|
|
|
ReturnPointerKind ReturnDevicePointer;
|
|
|
|
|
|
|
|
MapInfo()
|
|
|
|
: MapType(OMPC_MAP_unknown), MapTypeModifier(OMPC_MAP_unknown),
|
|
|
|
ReturnDevicePointer(RPK_None) {}
|
2016-07-28 22:23:26 +08:00
|
|
|
MapInfo(
|
|
|
|
OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
|
|
|
|
OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
|
|
|
|
ReturnPointerKind ReturnDevicePointer)
|
|
|
|
: Components(Components), MapType(MapType),
|
|
|
|
MapTypeModifier(MapTypeModifier),
|
|
|
|
ReturnDevicePointer(ReturnDevicePointer) {}
|
2016-04-28 06:40:57 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
// We have to process the component lists that relate with the same
|
|
|
|
// declaration in a single chunk so that we can generate the map flags
|
|
|
|
// correctly. Therefore, we organize all lists in a map.
|
2017-06-27 23:46:42 +08:00
|
|
|
llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
|
2016-05-27 02:30:22 +08:00
|
|
|
|
|
|
|
// Helper function to fill the information map for the different supported
|
|
|
|
// clauses.
|
2016-07-28 22:23:26 +08:00
|
|
|
auto &&InfoGen = [&Info](
|
|
|
|
const ValueDecl *D,
|
|
|
|
OMPClauseMappableExprCommon::MappableExprComponentListRef L,
|
|
|
|
OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier,
|
2016-07-28 22:47:35 +08:00
|
|
|
MapInfo::ReturnPointerKind ReturnDevicePointer) {
|
2016-07-28 22:23:26 +08:00
|
|
|
const ValueDecl *VD =
|
|
|
|
D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
|
|
|
|
Info[VD].push_back({L, MapType, MapModifier, ReturnDevicePointer});
|
|
|
|
};
|
2016-05-27 02:30:22 +08:00
|
|
|
|
2016-08-02 06:12:46 +08:00
|
|
|
// FIXME: MSVC 2013 seems to require this-> to find member CurDir.
|
2016-07-30 04:46:16 +08:00
|
|
|
for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
|
2016-05-27 02:30:22 +08:00
|
|
|
for (auto L : C->component_lists())
|
2016-07-28 22:47:35 +08:00
|
|
|
InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(),
|
|
|
|
MapInfo::RPK_None);
|
2016-07-30 04:46:16 +08:00
|
|
|
for (auto *C : this->CurDir.getClausesOfKind<OMPToClause>())
|
2016-05-27 02:30:22 +08:00
|
|
|
for (auto L : C->component_lists())
|
2016-07-28 22:47:35 +08:00
|
|
|
InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown,
|
|
|
|
MapInfo::RPK_None);
|
2016-07-30 04:46:16 +08:00
|
|
|
for (auto *C : this->CurDir.getClausesOfKind<OMPFromClause>())
|
2016-05-27 02:30:22 +08:00
|
|
|
for (auto L : C->component_lists())
|
2016-07-28 22:47:35 +08:00
|
|
|
InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown,
|
|
|
|
MapInfo::RPK_None);
|
2016-04-28 06:40:57 +08:00
|
|
|
|
2016-07-28 22:23:26 +08:00
|
|
|
// Look at the use_device_ptr clause information and mark the existing map
|
|
|
|
// entries as such. If there is no map information for an entry in the
|
|
|
|
// use_device_ptr list, we create one with map type 'alloc' and zero size
|
|
|
|
// section. It is the user fault if that was not mapped before.
|
2016-08-02 06:12:46 +08:00
|
|
|
// FIXME: MSVC 2013 seems to require this-> to find member CurDir.
|
2016-07-30 04:46:16 +08:00
|
|
|
for (auto *C : this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>())
|
2016-07-28 22:23:26 +08:00
|
|
|
for (auto L : C->component_lists()) {
|
|
|
|
assert(!L.second.empty() && "Not expecting empty list of components!");
|
|
|
|
const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
|
|
|
|
VD = cast<ValueDecl>(VD->getCanonicalDecl());
|
|
|
|
auto *IE = L.second.back().getAssociatedExpression();
|
|
|
|
// If the first component is a member expression, we have to look into
|
|
|
|
// 'this', which maps to null in the map of map information. Otherwise
|
|
|
|
// look directly for the information.
|
|
|
|
auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
|
|
|
|
|
|
|
|
// We potentially have map information for this declaration already.
|
|
|
|
// Look for the first set of components that refer to it.
|
|
|
|
if (It != Info.end()) {
|
|
|
|
auto CI = std::find_if(
|
|
|
|
It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
|
|
|
|
return MI.Components.back().getAssociatedDeclaration() == VD;
|
|
|
|
});
|
|
|
|
// If we found a map entry, signal that the pointer has to be returned
|
|
|
|
// and move on to the next declaration.
|
|
|
|
if (CI != It->second.end()) {
|
|
|
|
CI->ReturnDevicePointer = isa<MemberExpr>(IE)
|
|
|
|
? (VD->getType()->isReferenceType()
|
|
|
|
? MapInfo::RPK_MemberReference
|
|
|
|
: MapInfo::RPK_Member)
|
|
|
|
: MapInfo::RPK_Base;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// We didn't find any match in our map information - generate a zero
|
|
|
|
// size array section.
|
2016-08-02 06:12:46 +08:00
|
|
|
// FIXME: MSVC 2013 seems to require this-> to find member CGF.
|
2016-07-28 22:23:26 +08:00
|
|
|
llvm::Value *Ptr =
|
2016-07-30 04:46:16 +08:00
|
|
|
this->CGF
|
|
|
|
.EmitLoadOfLValue(this->CGF.EmitLValue(IE), SourceLocation())
|
2016-07-28 22:23:26 +08:00
|
|
|
.getScalarVal();
|
|
|
|
BasePointers.push_back({Ptr, VD});
|
|
|
|
Pointers.push_back(Ptr);
|
2016-07-30 04:46:16 +08:00
|
|
|
Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy));
|
2016-07-28 22:23:26 +08:00
|
|
|
Types.push_back(OMP_MAP_RETURN_PTR | OMP_MAP_FIRST_REF);
|
|
|
|
}
|
|
|
|
|
2016-04-28 06:40:57 +08:00
|
|
|
for (auto &M : Info) {
|
|
|
|
// We need to know when we generate information for the first component
|
|
|
|
// associated with a capture, because the mapping flags depend on it.
|
|
|
|
bool IsFirstComponentList = true;
|
|
|
|
for (MapInfo &L : M.second) {
|
|
|
|
assert(!L.Components.empty() &&
|
|
|
|
"Not expecting declaration with no component lists.");
|
2016-07-28 22:23:26 +08:00
|
|
|
|
|
|
|
// Remember the current base pointer index.
|
|
|
|
unsigned CurrentBasePointersIdx = BasePointers.size();
|
2016-08-02 06:12:46 +08:00
|
|
|
// FIXME: MSVC 2013 seems to require this-> to find the member method.
|
2016-07-30 04:46:16 +08:00
|
|
|
this->generateInfoForComponentList(L.MapType, L.MapTypeModifier,
|
|
|
|
L.Components, BasePointers, Pointers,
|
|
|
|
Sizes, Types, IsFirstComponentList);
|
2016-07-28 22:23:26 +08:00
|
|
|
|
|
|
|
// If this entry relates with a device pointer, set the relevant
|
|
|
|
// declaration and add the 'return pointer' flag.
|
|
|
|
if (IsFirstComponentList &&
|
|
|
|
L.ReturnDevicePointer != MapInfo::RPK_None) {
|
|
|
|
// If the pointer is not the base of the map, we need to skip the
|
|
|
|
// base. If it is a reference in a member field, we also need to skip
|
|
|
|
// the map of the reference.
|
|
|
|
if (L.ReturnDevicePointer != MapInfo::RPK_Base) {
|
|
|
|
++CurrentBasePointersIdx;
|
|
|
|
if (L.ReturnDevicePointer == MapInfo::RPK_MemberReference)
|
|
|
|
++CurrentBasePointersIdx;
|
|
|
|
}
|
|
|
|
assert(BasePointers.size() > CurrentBasePointersIdx &&
|
|
|
|
"Unexpected number of mapped base pointers.");
|
|
|
|
|
|
|
|
auto *RelevantVD = L.Components.back().getAssociatedDeclaration();
|
|
|
|
assert(RelevantVD &&
|
|
|
|
"No relevant declaration related with device pointer??");
|
|
|
|
|
|
|
|
BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
|
|
|
|
Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PTR;
|
|
|
|
}
|
2016-04-28 06:40:57 +08:00
|
|
|
IsFirstComponentList = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// \brief Generate the base pointers, section pointers, sizes and map types
|
|
|
|
/// associated to a given capture.
|
|
|
|
void generateInfoForCapture(const CapturedStmt::Capture *Cap,
|
2016-07-28 22:25:09 +08:00
|
|
|
llvm::Value *Arg,
|
2016-07-28 22:23:26 +08:00
|
|
|
MapBaseValuesArrayTy &BasePointers,
|
2016-04-28 06:40:57 +08:00
|
|
|
MapValuesArrayTy &Pointers,
|
|
|
|
MapValuesArrayTy &Sizes,
|
|
|
|
MapFlagsArrayTy &Types) const {
|
|
|
|
assert(!Cap->capturesVariableArrayType() &&
|
|
|
|
"Not expecting to generate map info for a variable array type!");
|
|
|
|
|
|
|
|
BasePointers.clear();
|
|
|
|
Pointers.clear();
|
|
|
|
Sizes.clear();
|
|
|
|
Types.clear();
|
|
|
|
|
2016-07-28 22:25:09 +08:00
|
|
|
// We need to know when we generating information for the first component
|
|
|
|
// associated with a capture, because the mapping flags depend on it.
|
|
|
|
bool IsFirstComponentList = true;
|
|
|
|
|
2016-04-28 06:40:57 +08:00
|
|
|
const ValueDecl *VD =
|
|
|
|
Cap->capturesThis()
|
|
|
|
? nullptr
|
|
|
|
: cast<ValueDecl>(Cap->getCapturedVar()->getCanonicalDecl());
|
|
|
|
|
2016-07-28 22:25:09 +08:00
|
|
|
// If this declaration appears in a is_device_ptr clause we just have to
|
|
|
|
// pass the pointer by value. If it is a reference to a declaration, we just
|
|
|
|
// pass its value, otherwise, if it is a member expression, we need to map
|
|
|
|
// 'to' the field.
|
|
|
|
if (!VD) {
|
|
|
|
auto It = DevPointersMap.find(VD);
|
|
|
|
if (It != DevPointersMap.end()) {
|
|
|
|
for (auto L : It->second) {
|
|
|
|
generateInfoForComponentList(
|
|
|
|
/*MapType=*/OMPC_MAP_to, /*MapTypeModifier=*/OMPC_MAP_unknown, L,
|
|
|
|
BasePointers, Pointers, Sizes, Types, IsFirstComponentList);
|
|
|
|
IsFirstComponentList = false;
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
} else if (DevPointersMap.count(VD)) {
|
|
|
|
BasePointers.push_back({Arg, VD});
|
|
|
|
Pointers.push_back(Arg);
|
|
|
|
Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
|
|
|
|
Types.push_back(OMP_MAP_PRIVATE_VAL | OMP_MAP_FIRST_REF);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2016-08-02 06:12:46 +08:00
|
|
|
// FIXME: MSVC 2013 seems to require this-> to find member CurDir.
|
2016-07-30 04:46:16 +08:00
|
|
|
for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
|
2016-04-28 06:40:57 +08:00
|
|
|
for (auto L : C->decl_component_lists(VD)) {
|
|
|
|
assert(L.first == VD &&
|
|
|
|
"We got information for the wrong declaration??");
|
|
|
|
assert(!L.second.empty() &&
|
|
|
|
"Not expecting declaration with no component lists.");
|
|
|
|
generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(),
|
|
|
|
L.second, BasePointers, Pointers, Sizes,
|
|
|
|
Types, IsFirstComponentList);
|
|
|
|
IsFirstComponentList = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
2016-05-27 00:53:38 +08:00
|
|
|
|
|
|
|
/// \brief Generate the default map information for a given capture \a CI,
|
|
|
|
/// record field declaration \a RI and captured value \a CV.
|
2016-07-28 22:23:26 +08:00
|
|
|
void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
|
|
|
|
const FieldDecl &RI, llvm::Value *CV,
|
|
|
|
MapBaseValuesArrayTy &CurBasePointers,
|
|
|
|
MapValuesArrayTy &CurPointers,
|
|
|
|
MapValuesArrayTy &CurSizes,
|
|
|
|
MapFlagsArrayTy &CurMapTypes) {
|
2016-05-27 00:53:38 +08:00
|
|
|
|
|
|
|
// Do the default mapping.
|
|
|
|
if (CI.capturesThis()) {
|
|
|
|
CurBasePointers.push_back(CV);
|
|
|
|
CurPointers.push_back(CV);
|
|
|
|
const PointerType *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
|
|
|
|
CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType()));
|
|
|
|
// Default map type.
|
2016-07-28 22:23:26 +08:00
|
|
|
CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
|
2016-05-27 00:53:38 +08:00
|
|
|
} else if (CI.capturesVariableByCopy()) {
|
2016-06-17 02:39:34 +08:00
|
|
|
CurBasePointers.push_back(CV);
|
|
|
|
CurPointers.push_back(CV);
|
2016-05-27 00:53:38 +08:00
|
|
|
if (!RI.getType()->isAnyPointerType()) {
|
2016-06-17 02:39:34 +08:00
|
|
|
// We have to signal to the runtime captures passed by value that are
|
|
|
|
// not pointers.
|
2016-07-28 22:23:26 +08:00
|
|
|
CurMapTypes.push_back(OMP_MAP_PRIVATE_VAL);
|
2016-05-27 00:53:38 +08:00
|
|
|
CurSizes.push_back(CGF.getTypeSize(RI.getType()));
|
|
|
|
} else {
|
|
|
|
// Pointers are implicitly mapped with a zero size and no flags
|
|
|
|
// (other than first map that is added for all implicit maps).
|
|
|
|
CurMapTypes.push_back(0u);
|
|
|
|
CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy));
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
assert(CI.capturesVariable() && "Expected captured reference.");
|
|
|
|
CurBasePointers.push_back(CV);
|
|
|
|
CurPointers.push_back(CV);
|
|
|
|
|
|
|
|
const ReferenceType *PtrTy =
|
|
|
|
cast<ReferenceType>(RI.getType().getTypePtr());
|
|
|
|
QualType ElementType = PtrTy->getPointeeType();
|
|
|
|
CurSizes.push_back(CGF.getTypeSize(ElementType));
|
|
|
|
// The default map type for a scalar/complex type is 'to' because by
|
|
|
|
// default the value doesn't have to be retrieved. For an aggregate
|
|
|
|
// type, the default is 'tofrom'.
|
|
|
|
CurMapTypes.push_back(ElementType->isAggregateType()
|
2016-07-28 22:23:26 +08:00
|
|
|
? (OMP_MAP_TO | OMP_MAP_FROM)
|
|
|
|
: OMP_MAP_TO);
|
2016-05-27 00:53:38 +08:00
|
|
|
|
|
|
|
// If we have a capture by reference we may need to add the private
|
|
|
|
// pointer flag if the base declaration shows in some first-private
|
|
|
|
// clause.
|
|
|
|
CurMapTypes.back() =
|
|
|
|
adjustMapModifiersForPrivateClauses(CI, CurMapTypes.back());
|
|
|
|
}
|
|
|
|
// Every default map produces a single argument, so, it is always the
|
|
|
|
// first one.
|
2016-07-28 22:23:26 +08:00
|
|
|
CurMapTypes.back() |= OMP_MAP_FIRST_REF;
|
2016-05-27 00:53:38 +08:00
|
|
|
}
|
2016-04-28 06:40:57 +08:00
|
|
|
};
|
2016-04-28 06:58:19 +08:00
|
|
|
|
|
|
|
enum OpenMPOffloadingReservedDeviceIDs {
|
|
|
|
/// \brief Device ID if the device was not defined, runtime should get it
|
|
|
|
/// from environment variables in the spec.
|
|
|
|
OMP_DEVICEID_UNDEF = -1,
|
|
|
|
};
|
|
|
|
} // anonymous namespace
|
|
|
|
|
|
|
|
/// \brief Emit the arrays used to pass the captures and map information to the
|
|
|
|
/// offloading runtime library. If there is no map or capture information,
|
|
|
|
/// return nullptr by reference.
|
|
|
|
static void
|
2016-07-28 22:23:26 +08:00
|
|
|
emitOffloadingArrays(CodeGenFunction &CGF,
|
|
|
|
MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
|
2016-04-28 06:58:19 +08:00
|
|
|
MappableExprsHandler::MapValuesArrayTy &Pointers,
|
|
|
|
MappableExprsHandler::MapValuesArrayTy &Sizes,
|
2016-07-28 22:23:26 +08:00
|
|
|
MappableExprsHandler::MapFlagsArrayTy &MapTypes,
|
|
|
|
CGOpenMPRuntime::TargetDataInfo &Info) {
|
2016-04-28 06:58:19 +08:00
|
|
|
auto &CGM = CGF.CGM;
|
|
|
|
auto &Ctx = CGF.getContext();
|
|
|
|
|
2016-07-28 22:23:26 +08:00
|
|
|
// Reset the array information.
|
|
|
|
Info.clearArrayInfo();
|
|
|
|
Info.NumberOfPtrs = BasePointers.size();
|
2016-04-28 06:58:19 +08:00
|
|
|
|
2016-07-28 22:23:26 +08:00
|
|
|
if (Info.NumberOfPtrs) {
|
2016-04-28 06:58:19 +08:00
|
|
|
// Detect if we have any capture size requiring runtime evaluation of the
|
|
|
|
// size so that a constant array could be eventually used.
|
|
|
|
bool hasRuntimeEvaluationCaptureSize = false;
|
|
|
|
for (auto *S : Sizes)
|
|
|
|
if (!isa<llvm::Constant>(S)) {
|
|
|
|
hasRuntimeEvaluationCaptureSize = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2016-07-28 22:23:26 +08:00
|
|
|
llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
|
2016-04-28 06:58:19 +08:00
|
|
|
QualType PointerArrayType =
|
|
|
|
Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
|
|
|
|
/*IndexTypeQuals=*/0);
|
|
|
|
|
2016-07-28 22:23:26 +08:00
|
|
|
Info.BasePointersArray =
|
2016-04-28 06:58:19 +08:00
|
|
|
CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
|
2016-07-28 22:23:26 +08:00
|
|
|
Info.PointersArray =
|
2016-04-28 06:58:19 +08:00
|
|
|
CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
|
|
|
|
|
|
|
|
// If we don't have any VLA types or other types that require runtime
|
|
|
|
// evaluation, we can use a constant array for the map sizes, otherwise we
|
|
|
|
// need to fill up the arrays as we do for the pointers.
|
|
|
|
if (hasRuntimeEvaluationCaptureSize) {
|
|
|
|
QualType SizeArrayType = Ctx.getConstantArrayType(
|
|
|
|
Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
|
|
|
|
/*IndexTypeQuals=*/0);
|
2016-07-28 22:23:26 +08:00
|
|
|
Info.SizesArray =
|
2016-04-28 06:58:19 +08:00
|
|
|
CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
|
|
|
|
} else {
|
|
|
|
// We expect all the sizes to be constant, so we collect them to create
|
|
|
|
// a constant array.
|
|
|
|
SmallVector<llvm::Constant *, 16> ConstSizes;
|
|
|
|
for (auto S : Sizes)
|
|
|
|
ConstSizes.push_back(cast<llvm::Constant>(S));
|
|
|
|
|
|
|
|
auto *SizesArrayInit = llvm::ConstantArray::get(
|
|
|
|
llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
|
|
|
|
auto *SizesArrayGbl = new llvm::GlobalVariable(
|
|
|
|
CGM.getModule(), SizesArrayInit->getType(),
|
|
|
|
/*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
|
|
|
|
SizesArrayInit, ".offload_sizes");
|
2016-06-15 05:02:05 +08:00
|
|
|
SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
|
2016-07-28 22:23:26 +08:00
|
|
|
Info.SizesArray = SizesArrayGbl;
|
2016-04-28 06:58:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// The map types are always constant so we don't need to generate code to
|
|
|
|
// fill arrays. Instead, we create an array constant.
|
|
|
|
llvm::Constant *MapTypesArrayInit =
|
|
|
|
llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
|
|
|
|
auto *MapTypesArrayGbl = new llvm::GlobalVariable(
|
|
|
|
CGM.getModule(), MapTypesArrayInit->getType(),
|
|
|
|
/*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
|
|
|
|
MapTypesArrayInit, ".offload_maptypes");
|
2016-06-15 05:02:05 +08:00
|
|
|
MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
|
2016-07-28 22:23:26 +08:00
|
|
|
Info.MapTypesArray = MapTypesArrayGbl;
|
2016-04-28 06:58:19 +08:00
|
|
|
|
2016-07-28 22:23:26 +08:00
|
|
|
for (unsigned i = 0; i < Info.NumberOfPtrs; ++i) {
|
|
|
|
llvm::Value *BPVal = *BasePointers[i];
|
2016-04-28 06:58:19 +08:00
|
|
|
llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
|
2016-07-28 22:23:26 +08:00
|
|
|
llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
|
|
|
|
Info.BasePointersArray, 0, i);
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
|
BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
|
2016-04-28 06:58:19 +08:00
|
|
|
Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
|
|
|
|
CGF.Builder.CreateStore(BPVal, BPAddr);
|
|
|
|
|
2016-07-28 22:23:26 +08:00
|
|
|
if (Info.requiresDevicePointerInfo())
|
|
|
|
if (auto *DevVD = BasePointers[i].getDevicePtrDecl())
|
|
|
|
Info.CaptureDeviceAddrMap.insert(std::make_pair(DevVD, BPAddr));
|
|
|
|
|
2016-04-28 06:58:19 +08:00
|
|
|
llvm::Value *PVal = Pointers[i];
|
|
|
|
llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
|
2016-07-28 22:23:26 +08:00
|
|
|
llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
|
|
|
|
Info.PointersArray, 0, i);
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
|
P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
|
2016-04-28 06:58:19 +08:00
|
|
|
Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
|
|
|
|
CGF.Builder.CreateStore(PVal, PAddr);
|
|
|
|
|
|
|
|
if (hasRuntimeEvaluationCaptureSize) {
|
|
|
|
llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
|
2016-07-28 22:23:26 +08:00
|
|
|
llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs),
|
|
|
|
Info.SizesArray,
|
2016-04-28 06:58:19 +08:00
|
|
|
/*Idx0=*/0,
|
|
|
|
/*Idx1=*/i);
|
|
|
|
Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
|
|
|
|
CGF.Builder.CreateStore(
|
|
|
|
CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true),
|
|
|
|
SAddr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/// \brief Emit the arguments to be passed to the runtime library based on the
|
|
|
|
/// arrays of pointers, sizes and map types.
|
|
|
|
static void emitOffloadingArraysArgument(
|
|
|
|
CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
|
|
|
|
llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
|
2016-07-28 22:23:26 +08:00
|
|
|
llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
|
2016-04-28 06:58:19 +08:00
|
|
|
auto &CGM = CGF.CGM;
|
2016-07-28 22:23:26 +08:00
|
|
|
if (Info.NumberOfPtrs) {
|
2016-04-28 06:58:19 +08:00
|
|
|
BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
|
2016-07-28 22:23:26 +08:00
|
|
|
llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
|
|
|
|
Info.BasePointersArray,
|
2016-04-28 06:58:19 +08:00
|
|
|
/*Idx0=*/0, /*Idx1=*/0);
|
|
|
|
PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
|
2016-07-28 22:23:26 +08:00
|
|
|
llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
|
|
|
|
Info.PointersArray,
|
2016-04-28 06:58:19 +08:00
|
|
|
/*Idx0=*/0,
|
|
|
|
/*Idx1=*/0);
|
|
|
|
SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
|
2016-07-28 22:23:26 +08:00
|
|
|
llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray,
|
2016-04-28 06:58:19 +08:00
|
|
|
/*Idx0=*/0, /*Idx1=*/0);
|
|
|
|
MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
|
2016-07-28 22:23:26 +08:00
|
|
|
llvm::ArrayType::get(CGM.Int32Ty, Info.NumberOfPtrs),
|
|
|
|
Info.MapTypesArray,
|
2016-04-28 06:58:19 +08:00
|
|
|
/*Idx0=*/0,
|
|
|
|
/*Idx1=*/0);
|
|
|
|
} else {
|
|
|
|
BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
|
|
|
|
PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
|
|
|
|
SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
|
|
|
|
MapTypesArrayArg =
|
|
|
|
llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo());
|
|
|
|
}
|
2016-04-28 06:40:57 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
|
|
|
|
const OMPExecutableDirective &D,
|
|
|
|
llvm::Value *OutlinedFn,
|
|
|
|
llvm::Value *OutlinedFnID,
|
|
|
|
const Expr *IfCond, const Expr *Device,
|
|
|
|
ArrayRef<llvm::Value *> CapturedVars) {
|
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
|
|
|
|
2016-01-06 21:42:12 +08:00
|
|
|
assert(OutlinedFn && "Invalid outlined function!");
|
|
|
|
|
2015-12-03 01:44:43 +08:00
|
|
|
auto &Ctx = CGF.getContext();
|
|
|
|
|
2016-04-28 06:40:57 +08:00
|
|
|
// Fill up the arrays with all the captured variables.
|
|
|
|
MappableExprsHandler::MapValuesArrayTy KernelArgs;
|
2016-07-28 22:23:26 +08:00
|
|
|
MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
|
2016-04-28 06:40:57 +08:00
|
|
|
MappableExprsHandler::MapValuesArrayTy Pointers;
|
|
|
|
MappableExprsHandler::MapValuesArrayTy Sizes;
|
|
|
|
MappableExprsHandler::MapFlagsArrayTy MapTypes;
|
|
|
|
|
2016-07-28 22:23:26 +08:00
|
|
|
MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
|
2016-04-28 06:40:57 +08:00
|
|
|
MappableExprsHandler::MapValuesArrayTy CurPointers;
|
|
|
|
MappableExprsHandler::MapValuesArrayTy CurSizes;
|
|
|
|
MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
|
2015-10-03 00:14:20 +08:00
|
|
|
|
2016-05-27 00:53:38 +08:00
|
|
|
// Get mappable expression information.
|
|
|
|
MappableExprsHandler MEHandler(D, CGF);
|
2015-10-03 00:14:20 +08:00
|
|
|
|
|
|
|
const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
|
|
|
|
auto RI = CS.getCapturedRecordDecl()->field_begin();
|
|
|
|
auto CV = CapturedVars.begin();
|
|
|
|
for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
|
|
|
|
CE = CS.capture_end();
|
|
|
|
CI != CE; ++CI, ++RI, ++CV) {
|
|
|
|
StringRef Name;
|
|
|
|
QualType Ty;
|
|
|
|
|
2016-04-28 06:40:57 +08:00
|
|
|
CurBasePointers.clear();
|
|
|
|
CurPointers.clear();
|
|
|
|
CurSizes.clear();
|
|
|
|
CurMapTypes.clear();
|
|
|
|
|
|
|
|
// VLA sizes are passed to the outlined region by copy and do not have map
|
|
|
|
// information associated.
|
2015-10-03 00:14:20 +08:00
|
|
|
if (CI->capturesVariableArrayType()) {
|
2016-04-28 06:40:57 +08:00
|
|
|
CurBasePointers.push_back(*CV);
|
|
|
|
CurPointers.push_back(*CV);
|
|
|
|
CurSizes.push_back(CGF.getTypeSize(RI->getType()));
|
2015-12-03 01:44:43 +08:00
|
|
|
// Copy to the device as an argument. No need to retrieve it.
|
2016-05-27 00:48:10 +08:00
|
|
|
CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL |
|
|
|
|
MappableExprsHandler::OMP_MAP_FIRST_REF);
|
2015-10-03 00:14:20 +08:00
|
|
|
} else {
|
2016-04-28 06:40:57 +08:00
|
|
|
// If we have any information in the map clause, we use it, otherwise we
|
|
|
|
// just do a default mapping.
|
2016-07-28 22:25:09 +08:00
|
|
|
MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
|
2016-04-28 06:40:57 +08:00
|
|
|
CurSizes, CurMapTypes);
|
2016-05-27 00:53:38 +08:00
|
|
|
if (CurBasePointers.empty())
|
|
|
|
MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
|
|
|
|
CurPointers, CurSizes, CurMapTypes);
|
2016-04-28 06:40:57 +08:00
|
|
|
}
|
|
|
|
// We expect to have at least an element of information for this capture.
|
|
|
|
assert(!CurBasePointers.empty() && "Non-existing map pointer for capture!");
|
|
|
|
assert(CurBasePointers.size() == CurPointers.size() &&
|
|
|
|
CurBasePointers.size() == CurSizes.size() &&
|
|
|
|
CurBasePointers.size() == CurMapTypes.size() &&
|
|
|
|
"Inconsistent map information sizes!");
|
|
|
|
|
|
|
|
// The kernel args are always the first elements of the base pointers
|
|
|
|
// associated with a capture.
|
2016-07-28 22:23:26 +08:00
|
|
|
KernelArgs.push_back(*CurBasePointers.front());
|
2016-04-28 06:40:57 +08:00
|
|
|
// We need to append the results of this capture to what we already have.
|
|
|
|
BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
|
|
|
|
Pointers.append(CurPointers.begin(), CurPointers.end());
|
|
|
|
Sizes.append(CurSizes.begin(), CurSizes.end());
|
|
|
|
MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
|
|
|
|
}
|
|
|
|
|
2015-10-03 00:14:20 +08:00
|
|
|
// Keep track on whether the host function has to be executed.
|
|
|
|
auto OffloadErrorQType =
|
2015-12-03 01:44:43 +08:00
|
|
|
Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true);
|
2015-10-03 00:14:20 +08:00
|
|
|
auto OffloadError = CGF.MakeAddrLValue(
|
|
|
|
CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"),
|
|
|
|
OffloadErrorQType);
|
|
|
|
CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty),
|
|
|
|
OffloadError);
|
|
|
|
|
|
|
|
// Fill up the pointer arrays and transfer execution to the device.
|
2017-01-14 02:55:32 +08:00
|
|
|
auto &&ThenGen = [&BasePointers, &Pointers, &Sizes, &MapTypes, Device,
|
|
|
|
OutlinedFnID, OffloadError,
|
2016-03-29 13:34:15 +08:00
|
|
|
&D](CodeGenFunction &CGF, PrePostActionTy &) {
|
|
|
|
auto &RT = CGF.CGM.getOpenMPRuntime();
|
2016-04-28 06:58:19 +08:00
|
|
|
// Emit the offloading arrays.
|
2016-07-28 22:23:26 +08:00
|
|
|
TargetDataInfo Info;
|
|
|
|
emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
|
|
|
|
emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
|
|
|
|
Info.PointersArray, Info.SizesArray,
|
|
|
|
Info.MapTypesArray, Info);
|
2015-10-03 00:14:20 +08:00
|
|
|
|
|
|
|
// On top of the arrays that were filled up, the target offloading call
|
|
|
|
// takes as arguments the device id as well as the host pointer. The host
|
|
|
|
// pointer is used by the runtime library to identify the current target
|
|
|
|
// region, so it only has to be unique and not necessarily point to
|
|
|
|
// anything. It could be the pointer to the outlined function that
|
|
|
|
// implements the target region, but we aren't using that so that the
|
|
|
|
// compiler doesn't need to keep that, and could therefore inline the host
|
|
|
|
// function if proven worthwhile during optimization.
|
|
|
|
|
2016-01-06 21:42:12 +08:00
|
|
|
// From this point on, we need to have an ID of the target region defined.
|
|
|
|
assert(OutlinedFnID && "Invalid outlined function ID!");
|
2015-10-03 00:14:20 +08:00
|
|
|
|
|
|
|
// Emit device ID if any.
|
|
|
|
llvm::Value *DeviceID;
|
|
|
|
if (Device)
|
|
|
|
DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
|
2016-03-29 13:34:15 +08:00
|
|
|
CGF.Int32Ty, /*isSigned=*/true);
|
2015-10-03 00:14:20 +08:00
|
|
|
else
|
|
|
|
DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
|
|
|
|
|
2016-04-28 06:58:19 +08:00
|
|
|
// Emit the number of elements in the offloading arrays.
|
|
|
|
llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size());
|
|
|
|
|
2016-03-04 00:20:23 +08:00
|
|
|
// Return value of the runtime offloading call.
|
|
|
|
llvm::Value *Return;
|
|
|
|
|
2017-01-25 08:57:16 +08:00
|
|
|
auto *NumTeams = emitNumTeamsForTargetDirective(RT, CGF, D);
|
|
|
|
auto *NumThreads = emitNumThreadsForTargetDirective(RT, CGF, D);
|
2016-03-04 00:20:23 +08:00
|
|
|
|
2017-01-25 08:57:16 +08:00
|
|
|
// The target region is an outlined function launched by the runtime
|
|
|
|
// via calls __tgt_target() or __tgt_target_teams().
|
|
|
|
//
|
|
|
|
// __tgt_target() launches a target region with one team and one thread,
|
|
|
|
// executing a serial region. This master thread may in turn launch
|
|
|
|
// more threads within its team upon encountering a parallel region,
|
|
|
|
// however, no additional teams can be launched on the device.
|
|
|
|
//
|
|
|
|
// __tgt_target_teams() launches a target region with one or more teams,
|
|
|
|
// each with one or more threads. This call is required for target
|
|
|
|
// constructs such as:
|
|
|
|
// 'target teams'
|
|
|
|
// 'target' / 'teams'
|
|
|
|
// 'target teams distribute parallel for'
|
|
|
|
// 'target parallel'
|
|
|
|
// and so on.
|
|
|
|
//
|
|
|
|
// Note that on the host and CPU targets, the runtime implementation of
|
|
|
|
// these calls simply call the outlined function without forking threads.
|
|
|
|
// The outlined functions themselves have runtime calls to
|
|
|
|
// __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
|
|
|
|
// the compiler in emitTeamsCall() and emitParallelCall().
|
|
|
|
//
|
|
|
|
// In contrast, on the NVPTX target, the implementation of
|
|
|
|
// __tgt_target_teams() launches a GPU kernel with the requested number
|
|
|
|
// of teams and threads so no additional calls to the runtime are required.
|
2016-03-04 00:20:23 +08:00
|
|
|
if (NumTeams) {
|
2017-01-25 08:57:16 +08:00
|
|
|
// If we have NumTeams defined this means that we have an enclosed teams
|
|
|
|
// region. Therefore we also expect to have NumThreads defined. These two
|
|
|
|
// values should be defined in the presence of a teams directive,
|
|
|
|
// regardless of having any clauses associated. If the user is using teams
|
|
|
|
// but no clauses, these two values will be the default that should be
|
|
|
|
// passed to the runtime library - a 32-bit integer with the value zero.
|
|
|
|
assert(NumThreads && "Thread limit expression should be available along "
|
|
|
|
"with number of teams.");
|
2016-03-04 00:20:23 +08:00
|
|
|
llvm::Value *OffloadingArgs[] = {
|
2016-07-28 22:23:26 +08:00
|
|
|
DeviceID, OutlinedFnID,
|
|
|
|
PointerNum, Info.BasePointersArray,
|
|
|
|
Info.PointersArray, Info.SizesArray,
|
|
|
|
Info.MapTypesArray, NumTeams,
|
2017-01-25 08:57:16 +08:00
|
|
|
NumThreads};
|
2016-03-04 00:20:23 +08:00
|
|
|
Return = CGF.EmitRuntimeCall(
|
2016-03-29 13:34:15 +08:00
|
|
|
RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs);
|
2016-03-04 00:20:23 +08:00
|
|
|
} else {
|
|
|
|
llvm::Value *OffloadingArgs[] = {
|
2016-07-28 22:23:26 +08:00
|
|
|
DeviceID, OutlinedFnID,
|
|
|
|
PointerNum, Info.BasePointersArray,
|
|
|
|
Info.PointersArray, Info.SizesArray,
|
|
|
|
Info.MapTypesArray};
|
2016-03-29 13:34:15 +08:00
|
|
|
Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target),
|
2016-03-04 00:20:23 +08:00
|
|
|
OffloadingArgs);
|
|
|
|
}
|
2015-10-03 00:14:20 +08:00
|
|
|
|
|
|
|
CGF.EmitStoreOfScalar(Return, OffloadError);
|
|
|
|
};
|
|
|
|
|
2016-01-06 21:42:12 +08:00
|
|
|
// Notify that the host version must be executed.
|
2016-03-29 13:34:15 +08:00
|
|
|
auto &&ElseGen = [OffloadError](CodeGenFunction &CGF, PrePostActionTy &) {
|
|
|
|
CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/-1u),
|
2016-01-06 21:42:12 +08:00
|
|
|
OffloadError);
|
|
|
|
};
|
|
|
|
|
|
|
|
// If we have a target function ID it means that we need to support
|
|
|
|
// offloading, otherwise, just execute on the host. We need to execute on host
|
|
|
|
// regardless of the conditional in the if clause if, e.g., the user do not
|
|
|
|
// specify target triples.
|
|
|
|
if (OutlinedFnID) {
|
2016-03-29 13:34:15 +08:00
|
|
|
if (IfCond)
|
2016-01-06 21:42:12 +08:00
|
|
|
emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
|
2016-03-29 13:34:15 +08:00
|
|
|
else {
|
|
|
|
RegionCodeGenTy ThenRCG(ThenGen);
|
|
|
|
ThenRCG(CGF);
|
2016-03-28 20:58:34 +08:00
|
|
|
}
|
|
|
|
} else {
|
2016-03-29 13:34:15 +08:00
|
|
|
RegionCodeGenTy ElseRCG(ElseGen);
|
|
|
|
ElseRCG(CGF);
|
2016-03-28 20:58:34 +08:00
|
|
|
}
|
2015-10-03 00:14:20 +08:00
|
|
|
|
|
|
|
// Check the error code and execute the host version if required.
|
|
|
|
auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed");
|
|
|
|
auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont");
|
|
|
|
auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation());
|
|
|
|
auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal);
|
|
|
|
CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
|
|
|
|
|
|
|
|
CGF.EmitBlock(OffloadFailedBlock);
|
2016-04-28 06:40:57 +08:00
|
|
|
CGF.Builder.CreateCall(OutlinedFn, KernelArgs);
|
2015-10-03 00:14:20 +08:00
|
|
|
CGF.EmitBranch(OffloadContBlock);
|
|
|
|
|
|
|
|
CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
|
|
|
|
}
|
2016-01-06 21:42:12 +08:00
|
|
|
|
|
|
|
void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
|
|
|
|
StringRef ParentName) {
|
|
|
|
if (!S)
|
|
|
|
return;
|
|
|
|
|
2017-01-16 23:26:02 +08:00
|
|
|
// Codegen OMP target directives that offload compute to the device.
|
|
|
|
bool requiresDeviceCodegen =
|
|
|
|
isa<OMPExecutableDirective>(S) &&
|
|
|
|
isOpenMPTargetExecutionDirective(
|
|
|
|
cast<OMPExecutableDirective>(S)->getDirectiveKind());
|
|
|
|
|
|
|
|
if (requiresDeviceCodegen) {
|
|
|
|
auto &E = *cast<OMPExecutableDirective>(S);
|
2016-01-06 21:42:12 +08:00
|
|
|
unsigned DeviceID;
|
|
|
|
unsigned FileID;
|
|
|
|
unsigned Line;
|
2017-01-16 23:26:02 +08:00
|
|
|
getTargetEntryUniqueInfo(CGM.getContext(), E.getLocStart(), DeviceID,
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
FileID, Line);
|
2016-01-06 21:42:12 +08:00
|
|
|
|
|
|
|
// Is this a target region that should not be emitted as an entry point? If
|
|
|
|
// so just signal we are done with this target region.
|
[OpenMP] Rename the offload entry points.
Summary:
Unlike other outlined regions in OpenMP, offloading entry points have to have be visible (external linkage) for the device side. Using dots in the names of the entries can be therefore problematic for some toolchains, e.g. NVPTX.
Also the patch drops the column information in the unique name of the entry points. The parsing of directives ignore unknown tokens, preventing several target regions to be implemented in the same line. Therefore, the line information is sufficient for the name to be unique. Also, the preprocessor printer does not preserve the column information, causing offloading-entry detection issues if the host uses an integrated preprocessor and the target doesn't (or vice versa).
Reviewers: hfinkel, arpith-jacob, carlo.bertolli, kkwli0, ABataev
Subscribers: cfe-commits, fraggamuffin, caomhin
Differential Revision: http://reviews.llvm.org/D17179
llvm-svn: 260837
2016-02-14 07:35:10 +08:00
|
|
|
if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
|
|
|
|
ParentName, Line))
|
2016-01-06 21:42:12 +08:00
|
|
|
return;
|
|
|
|
|
2017-01-16 23:26:02 +08:00
|
|
|
switch (S->getStmtClass()) {
|
|
|
|
case Stmt::OMPTargetDirectiveClass:
|
|
|
|
CodeGenFunction::EmitOMPTargetDeviceFunction(
|
|
|
|
CGM, ParentName, cast<OMPTargetDirective>(*S));
|
|
|
|
break;
|
2017-01-19 02:18:53 +08:00
|
|
|
case Stmt::OMPTargetParallelDirectiveClass:
|
|
|
|
CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
|
|
|
|
CGM, ParentName, cast<OMPTargetParallelDirective>(*S));
|
|
|
|
break;
|
2017-01-25 10:18:43 +08:00
|
|
|
case Stmt::OMPTargetTeamsDirectiveClass:
|
|
|
|
CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
|
|
|
|
CGM, ParentName, cast<OMPTargetTeamsDirective>(*S));
|
|
|
|
break;
|
2017-01-16 23:26:02 +08:00
|
|
|
default:
|
|
|
|
llvm_unreachable("Unknown target directive for OpenMP device codegen.");
|
|
|
|
}
|
2016-01-06 21:42:12 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) {
|
2016-05-08 14:43:56 +08:00
|
|
|
if (!E->hasAssociatedStmt())
|
2016-01-06 21:42:12 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
scanForTargetRegionsFunctions(
|
|
|
|
cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(),
|
|
|
|
ParentName);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If this is a lambda function, look into its body.
|
|
|
|
if (auto *L = dyn_cast<LambdaExpr>(S))
|
|
|
|
S = L->getBody();
|
|
|
|
|
|
|
|
// Keep looking for target regions recursively.
|
|
|
|
for (auto *II : S->children())
|
|
|
|
scanForTargetRegionsFunctions(II, ParentName);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
|
|
|
|
auto &FD = *cast<FunctionDecl>(GD.getDecl());
|
|
|
|
|
|
|
|
// If emitting code for the host, we do not process FD here. Instead we do
|
|
|
|
// the normal code generation.
|
|
|
|
if (!CGM.getLangOpts().OpenMPIsDevice)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Try to detect target regions in the function.
|
|
|
|
scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD));
|
|
|
|
|
2016-12-13 03:26:31 +08:00
|
|
|
// We should not emit any function other that the ones created during the
|
2016-01-06 21:42:12 +08:00
|
|
|
// scanning. Therefore, we signal that this function is completely dealt
|
|
|
|
// with.
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
|
|
|
|
if (!CGM.getLangOpts().OpenMPIsDevice)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Check if there are Ctors/Dtors in this declaration and look for target
|
|
|
|
// regions in it. We use the complete variant to produce the kernel name
|
|
|
|
// mangling.
|
|
|
|
QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
|
|
|
|
if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
|
|
|
|
for (auto *Ctor : RD->ctors()) {
|
|
|
|
StringRef ParentName =
|
|
|
|
CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
|
|
|
|
scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
|
|
|
|
}
|
|
|
|
auto *Dtor = RD->getDestructor();
|
|
|
|
if (Dtor) {
|
|
|
|
StringRef ParentName =
|
|
|
|
CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
|
|
|
|
scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-06-13 23:35:27 +08:00
|
|
|
// If we are in target mode, we do not emit any global (declare target is not
|
2016-01-06 21:42:12 +08:00
|
|
|
// implemented yet). Therefore we signal that GD was processed in this case.
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
|
|
|
|
auto *VD = GD.getDecl();
|
|
|
|
if (isa<FunctionDecl>(VD))
|
|
|
|
return emitTargetFunctions(GD);
|
|
|
|
|
|
|
|
return emitTargetGlobalVariable(GD);
|
|
|
|
}
|
|
|
|
|
|
|
|
llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
|
|
|
|
// If we have offloading in the current module, we need to emit the entries
|
|
|
|
// now and register the offloading descriptor.
|
|
|
|
createOffloadEntriesAndInfoMetadata();
|
|
|
|
|
|
|
|
// Create and register the offloading binary descriptors. This is the main
|
|
|
|
// entity that captures all the information about offloading in the current
|
|
|
|
// compilation unit.
|
|
|
|
return createOffloadingBinaryDescriptorRegistration();
|
|
|
|
}
|
2016-03-04 04:34:23 +08:00
|
|
|
|
|
|
|
void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
|
|
|
|
const OMPExecutableDirective &D,
|
|
|
|
SourceLocation Loc,
|
|
|
|
llvm::Value *OutlinedFn,
|
|
|
|
ArrayRef<llvm::Value *> CapturedVars) {
|
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
|
|
|
|
|
|
|
auto *RTLoc = emitUpdateLocation(CGF, Loc);
|
|
|
|
CodeGenFunction::RunCleanupsScope Scope(CGF);
|
|
|
|
|
|
|
|
// Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
|
|
|
|
llvm::Value *Args[] = {
|
|
|
|
RTLoc,
|
|
|
|
CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
|
|
|
|
CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
|
|
|
|
llvm::SmallVector<llvm::Value *, 16> RealArgs;
|
|
|
|
RealArgs.append(std::begin(Args), std::end(Args));
|
|
|
|
RealArgs.append(CapturedVars.begin(), CapturedVars.end());
|
|
|
|
|
|
|
|
auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
|
|
|
|
CGF.EmitRuntimeCall(RTLFn, RealArgs);
|
|
|
|
}
|
|
|
|
|
|
|
|
void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
|
2016-04-04 23:55:02 +08:00
|
|
|
const Expr *NumTeams,
|
|
|
|
const Expr *ThreadLimit,
|
2016-03-04 04:34:23 +08:00
|
|
|
SourceLocation Loc) {
|
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
|
|
|
|
|
|
|
auto *RTLoc = emitUpdateLocation(CGF, Loc);
|
|
|
|
|
2016-04-04 23:55:02 +08:00
|
|
|
llvm::Value *NumTeamsVal =
|
|
|
|
(NumTeams)
|
|
|
|
? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
|
|
|
|
CGF.CGM.Int32Ty, /* isSigned = */ true)
|
|
|
|
: CGF.Builder.getInt32(0);
|
|
|
|
|
|
|
|
llvm::Value *ThreadLimitVal =
|
|
|
|
(ThreadLimit)
|
|
|
|
? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
|
|
|
|
CGF.CGM.Int32Ty, /* isSigned = */ true)
|
|
|
|
: CGF.Builder.getInt32(0);
|
|
|
|
|
2016-03-04 04:34:23 +08:00
|
|
|
// Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
|
2016-04-04 23:55:02 +08:00
|
|
|
llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
|
|
|
|
ThreadLimitVal};
|
2016-03-04 04:34:23 +08:00
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
|
|
|
|
PushNumTeamsArgs);
|
|
|
|
}
|
2016-04-28 06:58:19 +08:00
|
|
|
|
2016-07-28 22:23:26 +08:00
|
|
|
void CGOpenMPRuntime::emitTargetDataCalls(
|
|
|
|
CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
|
|
|
|
const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
|
2016-04-28 06:58:19 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
|
|
|
|
2016-07-28 22:23:26 +08:00
|
|
|
// Action used to replace the default codegen action and turn privatization
|
|
|
|
// off.
|
|
|
|
PrePostActionTy NoPrivAction;
|
2016-04-28 06:58:19 +08:00
|
|
|
|
|
|
|
// Generate the code for the opening of the data environment. Capture all the
|
|
|
|
// arguments of the runtime call by reference because they are used in the
|
|
|
|
// closing of the region.
|
2017-01-14 02:55:32 +08:00
|
|
|
auto &&BeginThenGen = [&D, Device, &Info, &CodeGen](CodeGenFunction &CGF,
|
|
|
|
PrePostActionTy &) {
|
2016-04-28 06:58:19 +08:00
|
|
|
// Fill up the arrays with all the mapped variables.
|
2016-07-28 22:23:26 +08:00
|
|
|
MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
|
2016-04-28 06:58:19 +08:00
|
|
|
MappableExprsHandler::MapValuesArrayTy Pointers;
|
|
|
|
MappableExprsHandler::MapValuesArrayTy Sizes;
|
|
|
|
MappableExprsHandler::MapFlagsArrayTy MapTypes;
|
|
|
|
|
|
|
|
// Get map clause information.
|
|
|
|
MappableExprsHandler MCHandler(D, CGF);
|
|
|
|
MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
|
|
|
|
|
|
|
|
// Fill up the arrays and create the arguments.
|
2016-07-28 22:23:26 +08:00
|
|
|
emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
|
2016-04-28 06:58:19 +08:00
|
|
|
|
|
|
|
llvm::Value *BasePointersArrayArg = nullptr;
|
|
|
|
llvm::Value *PointersArrayArg = nullptr;
|
|
|
|
llvm::Value *SizesArrayArg = nullptr;
|
|
|
|
llvm::Value *MapTypesArrayArg = nullptr;
|
|
|
|
emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
|
2016-07-28 22:23:26 +08:00
|
|
|
SizesArrayArg, MapTypesArrayArg, Info);
|
2016-04-28 06:58:19 +08:00
|
|
|
|
|
|
|
// Emit device ID if any.
|
|
|
|
llvm::Value *DeviceID = nullptr;
|
|
|
|
if (Device)
|
|
|
|
DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
|
|
|
|
CGF.Int32Ty, /*isSigned=*/true);
|
|
|
|
else
|
|
|
|
DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
|
|
|
|
|
|
|
|
// Emit the number of elements in the offloading arrays.
|
2016-07-28 22:23:26 +08:00
|
|
|
auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
|
2016-04-28 06:58:19 +08:00
|
|
|
|
|
|
|
llvm::Value *OffloadingArgs[] = {
|
|
|
|
DeviceID, PointerNum, BasePointersArrayArg,
|
|
|
|
PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
|
|
|
|
auto &RT = CGF.CGM.getOpenMPRuntime();
|
|
|
|
CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin),
|
|
|
|
OffloadingArgs);
|
2016-07-28 22:23:26 +08:00
|
|
|
|
|
|
|
// If device pointer privatization is required, emit the body of the region
|
|
|
|
// here. It will have to be duplicated: with and without privatization.
|
|
|
|
if (!Info.CaptureDeviceAddrMap.empty())
|
|
|
|
CodeGen(CGF);
|
2016-04-28 06:58:19 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
// Generate code for the closing of the data region.
|
2017-01-14 02:55:32 +08:00
|
|
|
auto &&EndThenGen = [Device, &Info](CodeGenFunction &CGF, PrePostActionTy &) {
|
2016-07-28 22:23:26 +08:00
|
|
|
assert(Info.isValid() && "Invalid data environment closing arguments.");
|
2016-04-28 06:58:19 +08:00
|
|
|
|
|
|
|
llvm::Value *BasePointersArrayArg = nullptr;
|
|
|
|
llvm::Value *PointersArrayArg = nullptr;
|
|
|
|
llvm::Value *SizesArrayArg = nullptr;
|
|
|
|
llvm::Value *MapTypesArrayArg = nullptr;
|
|
|
|
emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
|
2016-07-28 22:23:26 +08:00
|
|
|
SizesArrayArg, MapTypesArrayArg, Info);
|
2016-04-28 06:58:19 +08:00
|
|
|
|
|
|
|
// Emit device ID if any.
|
|
|
|
llvm::Value *DeviceID = nullptr;
|
|
|
|
if (Device)
|
|
|
|
DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
|
|
|
|
CGF.Int32Ty, /*isSigned=*/true);
|
|
|
|
else
|
|
|
|
DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
|
|
|
|
|
|
|
|
// Emit the number of elements in the offloading arrays.
|
2016-07-28 22:23:26 +08:00
|
|
|
auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
|
2016-04-28 06:58:19 +08:00
|
|
|
|
|
|
|
llvm::Value *OffloadingArgs[] = {
|
|
|
|
DeviceID, PointerNum, BasePointersArrayArg,
|
|
|
|
PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
|
|
|
|
auto &RT = CGF.CGM.getOpenMPRuntime();
|
|
|
|
CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_end),
|
|
|
|
OffloadingArgs);
|
|
|
|
};
|
|
|
|
|
2016-07-28 22:23:26 +08:00
|
|
|
// If we need device pointer privatization, we need to emit the body of the
|
|
|
|
// region with no privatization in the 'else' branch of the conditional.
|
|
|
|
// Otherwise, we don't have to do anything.
|
|
|
|
auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
|
|
|
|
PrePostActionTy &) {
|
|
|
|
if (!Info.CaptureDeviceAddrMap.empty()) {
|
|
|
|
CodeGen.setAction(NoPrivAction);
|
|
|
|
CodeGen(CGF);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
// We don't have to do anything to close the region if the if clause evaluates
|
|
|
|
// to false.
|
|
|
|
auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
|
2016-04-28 06:58:19 +08:00
|
|
|
|
|
|
|
if (IfCond) {
|
2016-07-28 22:23:26 +08:00
|
|
|
emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
|
2016-04-28 06:58:19 +08:00
|
|
|
} else {
|
2016-07-28 22:23:26 +08:00
|
|
|
RegionCodeGenTy RCG(BeginThenGen);
|
|
|
|
RCG(CGF);
|
2016-04-28 06:58:19 +08:00
|
|
|
}
|
|
|
|
|
2016-07-28 22:23:26 +08:00
|
|
|
// If we don't require privatization of device pointers, we emit the body in
|
|
|
|
// between the runtime calls. This avoids duplicating the body code.
|
|
|
|
if (Info.CaptureDeviceAddrMap.empty()) {
|
|
|
|
CodeGen.setAction(NoPrivAction);
|
|
|
|
CodeGen(CGF);
|
|
|
|
}
|
2016-04-28 06:58:19 +08:00
|
|
|
|
|
|
|
if (IfCond) {
|
2016-07-28 22:23:26 +08:00
|
|
|
emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
|
2016-04-28 06:58:19 +08:00
|
|
|
} else {
|
2016-07-28 22:23:26 +08:00
|
|
|
RegionCodeGenTy RCG(EndThenGen);
|
|
|
|
RCG(CGF);
|
2016-04-28 06:58:19 +08:00
|
|
|
}
|
|
|
|
}
|
2016-04-28 07:07:29 +08:00
|
|
|
|
2016-05-27 02:30:22 +08:00
|
|
|
void CGOpenMPRuntime::emitTargetDataStandAloneCall(
|
2016-04-28 07:14:30 +08:00
|
|
|
CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
|
|
|
|
const Expr *Device) {
|
2016-04-28 07:07:29 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
|
|
|
|
2016-04-28 07:14:30 +08:00
|
|
|
assert((isa<OMPTargetEnterDataDirective>(D) ||
|
2016-05-27 02:30:22 +08:00
|
|
|
isa<OMPTargetExitDataDirective>(D) ||
|
|
|
|
isa<OMPTargetUpdateDirective>(D)) &&
|
|
|
|
"Expecting either target enter, exit data, or update directives.");
|
2016-04-28 07:14:30 +08:00
|
|
|
|
2016-04-28 07:07:29 +08:00
|
|
|
// Generate the code for the opening of the data environment.
|
2017-01-14 02:55:32 +08:00
|
|
|
auto &&ThenGen = [&D, Device](CodeGenFunction &CGF, PrePostActionTy &) {
|
2016-04-28 07:07:29 +08:00
|
|
|
// Fill up the arrays with all the mapped variables.
|
2016-07-28 22:23:26 +08:00
|
|
|
MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
|
2016-04-28 07:07:29 +08:00
|
|
|
MappableExprsHandler::MapValuesArrayTy Pointers;
|
|
|
|
MappableExprsHandler::MapValuesArrayTy Sizes;
|
|
|
|
MappableExprsHandler::MapFlagsArrayTy MapTypes;
|
|
|
|
|
|
|
|
// Get map clause information.
|
2016-05-27 02:30:22 +08:00
|
|
|
MappableExprsHandler MEHandler(D, CGF);
|
|
|
|
MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
|
2016-04-28 07:07:29 +08:00
|
|
|
|
|
|
|
// Fill up the arrays and create the arguments.
|
2016-07-28 22:23:26 +08:00
|
|
|
TargetDataInfo Info;
|
|
|
|
emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
|
|
|
|
emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
|
|
|
|
Info.PointersArray, Info.SizesArray,
|
|
|
|
Info.MapTypesArray, Info);
|
2016-04-28 07:07:29 +08:00
|
|
|
|
|
|
|
// Emit device ID if any.
|
|
|
|
llvm::Value *DeviceID = nullptr;
|
|
|
|
if (Device)
|
|
|
|
DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
|
|
|
|
CGF.Int32Ty, /*isSigned=*/true);
|
|
|
|
else
|
|
|
|
DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
|
|
|
|
|
|
|
|
// Emit the number of elements in the offloading arrays.
|
|
|
|
auto *PointerNum = CGF.Builder.getInt32(BasePointers.size());
|
|
|
|
|
|
|
|
llvm::Value *OffloadingArgs[] = {
|
2016-07-28 22:23:26 +08:00
|
|
|
DeviceID, PointerNum, Info.BasePointersArray,
|
|
|
|
Info.PointersArray, Info.SizesArray, Info.MapTypesArray};
|
2016-05-27 02:30:22 +08:00
|
|
|
|
2016-04-28 07:07:29 +08:00
|
|
|
auto &RT = CGF.CGM.getOpenMPRuntime();
|
2016-05-27 02:30:22 +08:00
|
|
|
// Select the right runtime function call for each expected standalone
|
|
|
|
// directive.
|
|
|
|
OpenMPRTLFunction RTLFn;
|
|
|
|
switch (D.getDirectiveKind()) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unexpected standalone target data directive.");
|
|
|
|
break;
|
|
|
|
case OMPD_target_enter_data:
|
|
|
|
RTLFn = OMPRTL__tgt_target_data_begin;
|
|
|
|
break;
|
|
|
|
case OMPD_target_exit_data:
|
|
|
|
RTLFn = OMPRTL__tgt_target_data_end;
|
|
|
|
break;
|
|
|
|
case OMPD_target_update:
|
|
|
|
RTLFn = OMPRTL__tgt_target_data_update;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
CGF.EmitRuntimeCall(RT.createRuntimeFunction(RTLFn), OffloadingArgs);
|
2016-04-28 07:07:29 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
// In the event we get an if clause, we don't have to take any action on the
|
|
|
|
// else side.
|
|
|
|
auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
|
|
|
|
|
|
|
|
if (IfCond) {
|
|
|
|
emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
|
|
|
|
} else {
|
|
|
|
RegionCodeGenTy ThenGenRCG(ThenGen);
|
|
|
|
ThenGenRCG(CGF);
|
|
|
|
}
|
|
|
|
}
|
2016-05-06 17:40:08 +08:00
|
|
|
|
|
|
|
namespace {
|
|
|
|
/// Kind of parameter in a function with 'declare simd' directive.
|
|
|
|
enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
|
|
|
|
/// Attribute set of the parameter.
|
|
|
|
struct ParamAttrTy {
|
|
|
|
ParamKindTy Kind = Vector;
|
|
|
|
llvm::APSInt StrideOrArg;
|
|
|
|
llvm::APSInt Alignment;
|
|
|
|
};
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
static unsigned evaluateCDTSize(const FunctionDecl *FD,
|
|
|
|
ArrayRef<ParamAttrTy> ParamAttrs) {
|
|
|
|
// Every vector variant of a SIMD-enabled function has a vector length (VLEN).
|
|
|
|
// If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
|
|
|
|
// of that clause. The VLEN value must be power of 2.
|
|
|
|
// In other case the notion of the function`s "characteristic data type" (CDT)
|
|
|
|
// is used to compute the vector length.
|
|
|
|
// CDT is defined in the following order:
|
|
|
|
// a) For non-void function, the CDT is the return type.
|
|
|
|
// b) If the function has any non-uniform, non-linear parameters, then the
|
|
|
|
// CDT is the type of the first such parameter.
|
|
|
|
// c) If the CDT determined by a) or b) above is struct, union, or class
|
|
|
|
// type which is pass-by-value (except for the type that maps to the
|
|
|
|
// built-in complex data type), the characteristic data type is int.
|
|
|
|
// d) If none of the above three cases is applicable, the CDT is int.
|
|
|
|
// The VLEN is then determined based on the CDT and the size of vector
|
|
|
|
// register of that ISA for which current vector version is generated. The
|
|
|
|
// VLEN is computed using the formula below:
|
|
|
|
// VLEN = sizeof(vector_register) / sizeof(CDT),
|
|
|
|
// where vector register size specified in section 3.2.1 Registers and the
|
|
|
|
// Stack Frame of original AMD64 ABI document.
|
|
|
|
QualType RetType = FD->getReturnType();
|
|
|
|
if (RetType.isNull())
|
|
|
|
return 0;
|
|
|
|
ASTContext &C = FD->getASTContext();
|
|
|
|
QualType CDT;
|
|
|
|
if (!RetType.isNull() && !RetType->isVoidType())
|
|
|
|
CDT = RetType;
|
|
|
|
else {
|
|
|
|
unsigned Offset = 0;
|
|
|
|
if (auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
|
|
|
|
if (ParamAttrs[Offset].Kind == Vector)
|
|
|
|
CDT = C.getPointerType(C.getRecordType(MD->getParent()));
|
|
|
|
++Offset;
|
|
|
|
}
|
|
|
|
if (CDT.isNull()) {
|
|
|
|
for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
|
|
|
|
if (ParamAttrs[I + Offset].Kind == Vector) {
|
|
|
|
CDT = FD->getParamDecl(I)->getType();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (CDT.isNull())
|
|
|
|
CDT = C.IntTy;
|
|
|
|
CDT = CDT->getCanonicalTypeUnqualified();
|
|
|
|
if (CDT->isRecordType() || CDT->isUnionType())
|
|
|
|
CDT = C.IntTy;
|
|
|
|
return C.getTypeSize(CDT);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
|
2016-11-25 00:01:20 +08:00
|
|
|
const llvm::APSInt &VLENVal,
|
2016-05-06 17:40:08 +08:00
|
|
|
ArrayRef<ParamAttrTy> ParamAttrs,
|
|
|
|
OMPDeclareSimdDeclAttr::BranchStateTy State) {
|
|
|
|
struct ISADataTy {
|
|
|
|
char ISA;
|
|
|
|
unsigned VecRegSize;
|
|
|
|
};
|
|
|
|
ISADataTy ISAData[] = {
|
|
|
|
{
|
|
|
|
'b', 128
|
|
|
|
}, // SSE
|
|
|
|
{
|
|
|
|
'c', 256
|
|
|
|
}, // AVX
|
|
|
|
{
|
|
|
|
'd', 256
|
|
|
|
}, // AVX2
|
|
|
|
{
|
|
|
|
'e', 512
|
|
|
|
}, // AVX512
|
|
|
|
};
|
|
|
|
llvm::SmallVector<char, 2> Masked;
|
|
|
|
switch (State) {
|
|
|
|
case OMPDeclareSimdDeclAttr::BS_Undefined:
|
|
|
|
Masked.push_back('N');
|
|
|
|
Masked.push_back('M');
|
|
|
|
break;
|
|
|
|
case OMPDeclareSimdDeclAttr::BS_Notinbranch:
|
|
|
|
Masked.push_back('N');
|
|
|
|
break;
|
|
|
|
case OMPDeclareSimdDeclAttr::BS_Inbranch:
|
|
|
|
Masked.push_back('M');
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
for (auto Mask : Masked) {
|
|
|
|
for (auto &Data : ISAData) {
|
|
|
|
SmallString<256> Buffer;
|
|
|
|
llvm::raw_svector_ostream Out(Buffer);
|
|
|
|
Out << "_ZGV" << Data.ISA << Mask;
|
|
|
|
if (!VLENVal) {
|
|
|
|
Out << llvm::APSInt::getUnsigned(Data.VecRegSize /
|
|
|
|
evaluateCDTSize(FD, ParamAttrs));
|
|
|
|
} else
|
|
|
|
Out << VLENVal;
|
|
|
|
for (auto &ParamAttr : ParamAttrs) {
|
|
|
|
switch (ParamAttr.Kind){
|
|
|
|
case LinearWithVarStride:
|
|
|
|
Out << 's' << ParamAttr.StrideOrArg;
|
|
|
|
break;
|
|
|
|
case Linear:
|
|
|
|
Out << 'l';
|
|
|
|
if (!!ParamAttr.StrideOrArg)
|
|
|
|
Out << ParamAttr.StrideOrArg;
|
|
|
|
break;
|
|
|
|
case Uniform:
|
|
|
|
Out << 'u';
|
|
|
|
break;
|
|
|
|
case Vector:
|
|
|
|
Out << 'v';
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (!!ParamAttr.Alignment)
|
|
|
|
Out << 'a' << ParamAttr.Alignment;
|
|
|
|
}
|
|
|
|
Out << '_' << Fn->getName();
|
|
|
|
Fn->addFnAttr(Out.str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
|
|
|
|
llvm::Function *Fn) {
|
|
|
|
ASTContext &C = CGM.getContext();
|
|
|
|
FD = FD->getCanonicalDecl();
|
|
|
|
// Map params to their positions in function decl.
|
|
|
|
llvm::DenseMap<const Decl *, unsigned> ParamPositions;
|
|
|
|
if (isa<CXXMethodDecl>(FD))
|
|
|
|
ParamPositions.insert({FD, 0});
|
|
|
|
unsigned ParamPos = ParamPositions.size();
|
2016-06-24 12:05:48 +08:00
|
|
|
for (auto *P : FD->parameters()) {
|
2016-05-06 17:40:08 +08:00
|
|
|
ParamPositions.insert({P->getCanonicalDecl(), ParamPos});
|
|
|
|
++ParamPos;
|
|
|
|
}
|
|
|
|
for (auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
|
|
|
|
llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
|
|
|
|
// Mark uniform parameters.
|
|
|
|
for (auto *E : Attr->uniforms()) {
|
|
|
|
E = E->IgnoreParenImpCasts();
|
|
|
|
unsigned Pos;
|
|
|
|
if (isa<CXXThisExpr>(E))
|
|
|
|
Pos = ParamPositions[FD];
|
|
|
|
else {
|
|
|
|
auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
|
|
|
|
->getCanonicalDecl();
|
|
|
|
Pos = ParamPositions[PVD];
|
|
|
|
}
|
|
|
|
ParamAttrs[Pos].Kind = Uniform;
|
|
|
|
}
|
|
|
|
// Get alignment info.
|
|
|
|
auto NI = Attr->alignments_begin();
|
|
|
|
for (auto *E : Attr->aligneds()) {
|
|
|
|
E = E->IgnoreParenImpCasts();
|
|
|
|
unsigned Pos;
|
|
|
|
QualType ParmTy;
|
|
|
|
if (isa<CXXThisExpr>(E)) {
|
|
|
|
Pos = ParamPositions[FD];
|
|
|
|
ParmTy = E->getType();
|
|
|
|
} else {
|
|
|
|
auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
|
|
|
|
->getCanonicalDecl();
|
|
|
|
Pos = ParamPositions[PVD];
|
|
|
|
ParmTy = PVD->getType();
|
|
|
|
}
|
|
|
|
ParamAttrs[Pos].Alignment =
|
|
|
|
(*NI) ? (*NI)->EvaluateKnownConstInt(C)
|
|
|
|
: llvm::APSInt::getUnsigned(
|
|
|
|
C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
|
|
|
|
.getQuantity());
|
|
|
|
++NI;
|
|
|
|
}
|
|
|
|
// Mark linear parameters.
|
|
|
|
auto SI = Attr->steps_begin();
|
|
|
|
auto MI = Attr->modifiers_begin();
|
|
|
|
for (auto *E : Attr->linears()) {
|
|
|
|
E = E->IgnoreParenImpCasts();
|
|
|
|
unsigned Pos;
|
|
|
|
if (isa<CXXThisExpr>(E))
|
|
|
|
Pos = ParamPositions[FD];
|
|
|
|
else {
|
|
|
|
auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
|
|
|
|
->getCanonicalDecl();
|
|
|
|
Pos = ParamPositions[PVD];
|
|
|
|
}
|
|
|
|
auto &ParamAttr = ParamAttrs[Pos];
|
|
|
|
ParamAttr.Kind = Linear;
|
|
|
|
if (*SI) {
|
|
|
|
if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C,
|
|
|
|
Expr::SE_AllowSideEffects)) {
|
|
|
|
if (auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
|
|
|
|
if (auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
|
|
|
|
ParamAttr.Kind = LinearWithVarStride;
|
|
|
|
ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
|
|
|
|
ParamPositions[StridePVD->getCanonicalDecl()]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
++SI;
|
|
|
|
++MI;
|
|
|
|
}
|
|
|
|
llvm::APSInt VLENVal;
|
|
|
|
if (const Expr *VLEN = Attr->getSimdlen())
|
|
|
|
VLENVal = VLEN->EvaluateKnownConstInt(C);
|
|
|
|
OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
|
|
|
|
if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
|
|
|
|
CGM.getTriple().getArch() == llvm::Triple::x86_64)
|
|
|
|
emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
|
|
|
|
}
|
|
|
|
}
|
2016-05-25 20:36:08 +08:00
|
|
|
|
|
|
|
namespace {
|
|
|
|
/// Cleanup action for doacross support.
|
|
|
|
class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
|
|
|
|
public:
|
|
|
|
static const int DoacrossFinArgs = 2;
|
|
|
|
|
|
|
|
private:
|
|
|
|
llvm::Value *RTLFn;
|
|
|
|
llvm::Value *Args[DoacrossFinArgs];
|
|
|
|
|
|
|
|
public:
|
|
|
|
DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs)
|
|
|
|
: RTLFn(RTLFn) {
|
|
|
|
assert(CallArgs.size() == DoacrossFinArgs);
|
|
|
|
std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
|
|
|
|
}
|
|
|
|
void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
|
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
|
|
|
CGF.EmitRuntimeCall(RTLFn, Args);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
|
|
|
|
const OMPLoopDirective &D) {
|
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
|
|
|
|
|
|
|
ASTContext &C = CGM.getContext();
|
|
|
|
QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
|
|
|
|
RecordDecl *RD;
|
|
|
|
if (KmpDimTy.isNull()) {
|
|
|
|
// Build struct kmp_dim { // loop bounds info casted to kmp_int64
|
|
|
|
// kmp_int64 lo; // lower
|
|
|
|
// kmp_int64 up; // upper
|
|
|
|
// kmp_int64 st; // stride
|
|
|
|
// };
|
|
|
|
RD = C.buildImplicitRecord("kmp_dim");
|
|
|
|
RD->startDefinition();
|
|
|
|
addFieldToRecordDecl(C, RD, Int64Ty);
|
|
|
|
addFieldToRecordDecl(C, RD, Int64Ty);
|
|
|
|
addFieldToRecordDecl(C, RD, Int64Ty);
|
|
|
|
RD->completeDefinition();
|
|
|
|
KmpDimTy = C.getRecordType(RD);
|
|
|
|
} else
|
|
|
|
RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
|
|
|
|
|
|
|
|
Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims");
|
|
|
|
CGF.EmitNullInitialization(DimsAddr, KmpDimTy);
|
|
|
|
enum { LowerFD = 0, UpperFD, StrideFD };
|
|
|
|
// Fill dims with data.
|
|
|
|
LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy);
|
|
|
|
// dims.upper = num_iterations;
|
|
|
|
LValue UpperLVal =
|
|
|
|
CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD));
|
|
|
|
llvm::Value *NumIterVal = CGF.EmitScalarConversion(
|
|
|
|
CGF.EmitScalarExpr(D.getNumIterations()), D.getNumIterations()->getType(),
|
|
|
|
Int64Ty, D.getNumIterations()->getExprLoc());
|
|
|
|
CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
|
|
|
|
// dims.stride = 1;
|
|
|
|
LValue StrideLVal =
|
|
|
|
CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD));
|
|
|
|
CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
|
|
|
|
StrideLVal);
|
|
|
|
|
|
|
|
// Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
|
|
|
|
// kmp_int32 num_dims, struct kmp_dim * dims);
|
|
|
|
llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()),
|
|
|
|
getThreadID(CGF, D.getLocStart()),
|
|
|
|
llvm::ConstantInt::getSigned(CGM.Int32Ty, 1),
|
|
|
|
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
|
DimsAddr.getPointer(), CGM.VoidPtrTy)};
|
|
|
|
|
|
|
|
llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init);
|
|
|
|
CGF.EmitRuntimeCall(RTLFn, Args);
|
|
|
|
llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
|
|
|
|
emitUpdateLocation(CGF, D.getLocEnd()), getThreadID(CGF, D.getLocEnd())};
|
|
|
|
llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
|
|
|
|
CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
|
|
|
|
llvm::makeArrayRef(FiniArgs));
|
|
|
|
}
|
|
|
|
|
|
|
|
void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
|
|
|
|
const OMPDependClause *C) {
|
|
|
|
QualType Int64Ty =
|
|
|
|
CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
|
|
|
|
const Expr *CounterVal = C->getCounterValue();
|
|
|
|
assert(CounterVal);
|
|
|
|
llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal),
|
|
|
|
CounterVal->getType(), Int64Ty,
|
|
|
|
CounterVal->getExprLoc());
|
|
|
|
Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr");
|
|
|
|
CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty);
|
|
|
|
llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()),
|
|
|
|
getThreadID(CGF, C->getLocStart()),
|
|
|
|
CntAddr.getPointer()};
|
|
|
|
llvm::Value *RTLFn;
|
|
|
|
if (C->getDependencyKind() == OMPC_DEPEND_source)
|
|
|
|
RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
|
|
|
|
else {
|
|
|
|
assert(C->getDependencyKind() == OMPC_DEPEND_sink);
|
|
|
|
RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
|
|
|
|
}
|
|
|
|
CGF.EmitRuntimeCall(RTLFn, Args);
|
|
|
|
}
|
|
|
|
|