[OpenMP] Codegen for use_device_ptr clause.

Summary: This patch adds support for the use_device_ptr clause. It includes changes in SEMA that could not be tested without codegen, namely, the use of the first private logic and mappable expressions support.

Reviewers: hfinkel, carlo.bertolli, arpith-jacob, kkwli0, ABataev

Subscribers: caomhin, cfe-commits

Differential Revision: https://reviews.llvm.org/D22691

llvm-svn: 276977
This commit is contained in:
Samuel Antao 2016-07-28 14:23:26 +00:00
parent 19459580af
commit cc10b85789
10 changed files with 1195 additions and 191 deletions

View File

@ -4228,50 +4228,153 @@ public:
/// 'use_device_ptr' with the variables 'a' and 'b'. /// 'use_device_ptr' with the variables 'a' and 'b'.
/// ///
class OMPUseDevicePtrClause final class OMPUseDevicePtrClause final
: public OMPVarListClause<OMPUseDevicePtrClause>, : public OMPMappableExprListClause<OMPUseDevicePtrClause>,
private llvm::TrailingObjects<OMPUseDevicePtrClause, Expr *> { private llvm::TrailingObjects<
OMPUseDevicePtrClause, Expr *, ValueDecl *, unsigned,
OMPClauseMappableExprCommon::MappableComponent> {
friend TrailingObjects; friend TrailingObjects;
friend OMPVarListClause; friend OMPVarListClause;
friend OMPMappableExprListClause;
friend class OMPClauseReader; friend class OMPClauseReader;
/// Build clause with number of variables \a N.
/// Define the sizes of each trailing object array except the last one. This
/// is required for TrailingObjects to work properly.
size_t numTrailingObjects(OverloadToken<Expr *>) const {
return 3 * varlist_size();
}
size_t numTrailingObjects(OverloadToken<ValueDecl *>) const {
return getUniqueDeclarationsNum();
}
size_t numTrailingObjects(OverloadToken<unsigned>) const {
return getUniqueDeclarationsNum() + getTotalComponentListNum();
}
/// Build clause with number of variables \a NumVars.
/// ///
/// \param StartLoc Starting location of the clause. /// \param StartLoc Starting location of the clause.
/// \param LParenLoc Location of '('.
/// \param EndLoc Ending location of the clause. /// \param EndLoc Ending location of the clause.
/// \param N Number of the variables in the clause. /// \param NumVars Number of expressions listed in this clause.
/// \param NumUniqueDeclarations Number of unique base declarations in this
/// clause.
/// \param NumComponentLists Number of component lists in this clause.
/// \param NumComponents Total number of expression components in the clause.
/// ///
OMPUseDevicePtrClause(SourceLocation StartLoc, SourceLocation LParenLoc, explicit OMPUseDevicePtrClause(SourceLocation StartLoc,
SourceLocation EndLoc, unsigned N) SourceLocation LParenLoc,
: OMPVarListClause<OMPUseDevicePtrClause>(OMPC_use_device_ptr, StartLoc, SourceLocation EndLoc, unsigned NumVars,
LParenLoc, EndLoc, N) {} unsigned NumUniqueDeclarations,
unsigned NumComponentLists,
unsigned NumComponents)
: OMPMappableExprListClause(OMPC_use_device_ptr, StartLoc, LParenLoc,
EndLoc, NumVars, NumUniqueDeclarations,
NumComponentLists, NumComponents) {}
/// \brief Build an empty clause. /// Build an empty clause.
/// ///
/// \param N Number of variables. /// \param NumVars Number of expressions listed in this clause.
/// \param NumUniqueDeclarations Number of unique base declarations in this
/// clause.
/// \param NumComponentLists Number of component lists in this clause.
/// \param NumComponents Total number of expression components in the clause.
/// ///
explicit OMPUseDevicePtrClause(unsigned N) explicit OMPUseDevicePtrClause(unsigned NumVars,
: OMPVarListClause<OMPUseDevicePtrClause>( unsigned NumUniqueDeclarations,
OMPC_use_device_ptr, SourceLocation(), SourceLocation(), unsigned NumComponentLists,
SourceLocation(), N) {} unsigned NumComponents)
: OMPMappableExprListClause(OMPC_use_device_ptr, SourceLocation(),
SourceLocation(), SourceLocation(), NumVars,
NumUniqueDeclarations, NumComponentLists,
NumComponents) {}
/// Sets the list of references to private copies with initializers for new
/// private variables.
/// \param VL List of references.
void setPrivateCopies(ArrayRef<Expr *> VL);
/// Gets the list of references to private copies with initializers for new
/// private variables.
MutableArrayRef<Expr *> getPrivateCopies() {
return MutableArrayRef<Expr *>(varlist_end(), varlist_size());
}
ArrayRef<const Expr *> getPrivateCopies() const {
return llvm::makeArrayRef(varlist_end(), varlist_size());
}
/// Sets the list of references to initializer variables for new private
/// variables.
/// \param VL List of references.
void setInits(ArrayRef<Expr *> VL);
/// Gets the list of references to initializer variables for new private
/// variables.
MutableArrayRef<Expr *> getInits() {
return MutableArrayRef<Expr *>(getPrivateCopies().end(), varlist_size());
}
ArrayRef<const Expr *> getInits() const {
return llvm::makeArrayRef(getPrivateCopies().end(), varlist_size());
}
public: public:
/// Creates clause with a list of variables \a VL. /// Creates clause with a list of variables \a Vars.
/// ///
/// \param C AST context. /// \param C AST context.
/// \param StartLoc Starting location of the clause. /// \param StartLoc Starting location of the clause.
/// \param LParenLoc Location of '('.
/// \param EndLoc Ending location of the clause. /// \param EndLoc Ending location of the clause.
/// \param VL List of references to the variables. /// \param Vars The original expression used in the clause.
/// \param PrivateVars Expressions referring to private copies.
/// \param Inits Expressions referring to private copy initializers.
/// \param Declarations Declarations used in the clause.
/// \param ComponentLists Component lists used in the clause.
/// ///
static OMPUseDevicePtrClause * static OMPUseDevicePtrClause *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
SourceLocation EndLoc, ArrayRef<Expr *> VL); SourceLocation EndLoc, ArrayRef<Expr *> Vars,
/// Creates an empty clause with the place for \a N variables. ArrayRef<Expr *> PrivateVars, ArrayRef<Expr *> Inits,
ArrayRef<ValueDecl *> Declarations,
MappableExprComponentListsRef ComponentLists);
/// Creates an empty clause with the place for \a NumVars variables.
/// ///
/// \param C AST context. /// \param C AST context.
/// \param N The number of variables. /// \param NumVars Number of expressions listed in the clause.
/// \param NumUniqueDeclarations Number of unique base declarations in this
/// clause.
/// \param NumComponentLists Number of unique base declarations in this
/// clause.
/// \param NumComponents Total number of expression components in the clause.
/// ///
static OMPUseDevicePtrClause *CreateEmpty(const ASTContext &C, unsigned N); static OMPUseDevicePtrClause *CreateEmpty(const ASTContext &C,
unsigned NumVars,
unsigned NumUniqueDeclarations,
unsigned NumComponentLists,
unsigned NumComponents);
typedef MutableArrayRef<Expr *>::iterator private_copies_iterator;
typedef ArrayRef<const Expr *>::iterator private_copies_const_iterator;
typedef llvm::iterator_range<private_copies_iterator> private_copies_range;
typedef llvm::iterator_range<private_copies_const_iterator>
private_copies_const_range;
private_copies_range private_copies() {
return private_copies_range(getPrivateCopies().begin(),
getPrivateCopies().end());
}
private_copies_const_range private_copies() const {
return private_copies_const_range(getPrivateCopies().begin(),
getPrivateCopies().end());
}
typedef MutableArrayRef<Expr *>::iterator inits_iterator;
typedef ArrayRef<const Expr *>::iterator inits_const_iterator;
typedef llvm::iterator_range<inits_iterator> inits_range;
typedef llvm::iterator_range<inits_const_iterator> inits_const_range;
inits_range inits() {
return inits_range(getInits().begin(), getInits().end());
}
inits_const_range inits() const {
return inits_const_range(getInits().begin(), getInits().end());
}
child_range children() { child_range children() {
return child_range(reinterpret_cast<Stmt **>(varlist_begin()), return child_range(reinterpret_cast<Stmt **>(varlist_begin()),

View File

@ -732,22 +732,66 @@ OMPFromClause *OMPFromClause::CreateEmpty(const ASTContext &C, unsigned NumVars,
NumComponentLists, NumComponents); NumComponentLists, NumComponents);
} }
OMPUseDevicePtrClause *OMPUseDevicePtrClause::Create(const ASTContext &C, void OMPUseDevicePtrClause::setPrivateCopies(ArrayRef<Expr *> VL) {
SourceLocation StartLoc, assert(VL.size() == varlist_size() &&
SourceLocation LParenLoc, "Number of private copies is not the same as the preallocated buffer");
SourceLocation EndLoc, std::copy(VL.begin(), VL.end(), varlist_end());
ArrayRef<Expr *> VL) { }
void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(VL.size()));
OMPUseDevicePtrClause *Clause = void OMPUseDevicePtrClause::setInits(ArrayRef<Expr *> VL) {
new (Mem) OMPUseDevicePtrClause(StartLoc, LParenLoc, EndLoc, VL.size()); assert(VL.size() == varlist_size() &&
Clause->setVarRefs(VL); "Number of inits is not the same as the preallocated buffer");
std::copy(VL.begin(), VL.end(), getPrivateCopies().end());
}
OMPUseDevicePtrClause *OMPUseDevicePtrClause::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
SourceLocation EndLoc, ArrayRef<Expr *> Vars, ArrayRef<Expr *> PrivateVars,
ArrayRef<Expr *> Inits, ArrayRef<ValueDecl *> Declarations,
MappableExprComponentListsRef ComponentLists) {
unsigned NumVars = Vars.size();
unsigned NumUniqueDeclarations =
getUniqueDeclarationsTotalNumber(Declarations);
unsigned NumComponentLists = ComponentLists.size();
unsigned NumComponents = getComponentsTotalNumber(ComponentLists);
// We need to allocate:
// 3 x NumVars x Expr* - we have an original list expression for each clause
// list entry and an equal number of private copies and inits.
// NumUniqueDeclarations x ValueDecl* - unique base declarations associated
// with each component list.
// (NumUniqueDeclarations + NumComponentLists) x unsigned - we specify the
// number of lists for each unique declaration and the size of each component
// list.
// NumComponents x MappableComponent - the total of all the components in all
// the lists.
void *Mem = C.Allocate(
totalSizeToAlloc<Expr *, ValueDecl *, unsigned,
OMPClauseMappableExprCommon::MappableComponent>(
3 * NumVars, NumUniqueDeclarations,
NumUniqueDeclarations + NumComponentLists, NumComponents));
OMPUseDevicePtrClause *Clause = new (Mem) OMPUseDevicePtrClause(
StartLoc, LParenLoc, EndLoc, NumVars, NumUniqueDeclarations,
NumComponentLists, NumComponents);
Clause->setVarRefs(Vars);
Clause->setPrivateCopies(PrivateVars);
Clause->setInits(Inits);
Clause->setClauseInfo(Declarations, ComponentLists);
return Clause; return Clause;
} }
OMPUseDevicePtrClause *OMPUseDevicePtrClause::CreateEmpty(const ASTContext &C, OMPUseDevicePtrClause *OMPUseDevicePtrClause::CreateEmpty(
unsigned N) { const ASTContext &C, unsigned NumVars, unsigned NumUniqueDeclarations,
void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(N)); unsigned NumComponentLists, unsigned NumComponents) {
return new (Mem) OMPUseDevicePtrClause(N); void *Mem = C.Allocate(
totalSizeToAlloc<Expr *, ValueDecl *, unsigned,
OMPClauseMappableExprCommon::MappableComponent>(
3 * NumVars, NumUniqueDeclarations,
NumUniqueDeclarations + NumComponentLists, NumComponents));
return new (Mem) OMPUseDevicePtrClause(NumVars, NumUniqueDeclarations,
NumComponentLists, NumComponents);
} }
OMPIsDevicePtrClause *OMPIsDevicePtrClause::Create(const ASTContext &C, OMPIsDevicePtrClause *OMPIsDevicePtrClause::Create(const ASTContext &C,

View File

@ -4981,6 +4981,9 @@ public:
/// map/privatization results in multiple arguments passed to the runtime /// map/privatization results in multiple arguments passed to the runtime
/// library. /// library.
OMP_MAP_FIRST_REF = 0x20, OMP_MAP_FIRST_REF = 0x20,
/// \brief Signal that the runtime library has to return the device pointer
/// in the current position for the data being mapped.
OMP_MAP_RETURN_PTR = 0x40,
/// \brief This flag signals that the reference being passed is a pointer to /// \brief This flag signals that the reference being passed is a pointer to
/// private data. /// private data.
OMP_MAP_PRIVATE_PTR = 0x80, OMP_MAP_PRIVATE_PTR = 0x80,
@ -4988,6 +4991,24 @@ public:
OMP_MAP_PRIVATE_VAL = 0x100, OMP_MAP_PRIVATE_VAL = 0x100,
}; };
/// Class that associates information with a base pointer to be passed to the
/// runtime library.
class BasePointerInfo {
/// The base pointer.
llvm::Value *Ptr = nullptr;
/// The base declaration that refers to this device pointer, or null if
/// there is none.
const ValueDecl *DevPtrDecl = nullptr;
public:
BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
: Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
llvm::Value *operator*() const { return Ptr; }
const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
};
typedef SmallVector<BasePointerInfo, 16> MapBaseValuesArrayTy;
typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy; typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy;
typedef SmallVector<unsigned, 16> MapFlagsArrayTy; typedef SmallVector<unsigned, 16> MapFlagsArrayTy;
@ -5129,7 +5150,7 @@ private:
void generateInfoForComponentList( void generateInfoForComponentList(
OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
OMPClauseMappableExprCommon::MappableExprComponentListRef Components, OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
MapValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
bool IsFirstComponentList) const { bool IsFirstComponentList) const {
@ -5400,8 +5421,10 @@ public:
} }
/// \brief Generate all the base pointers, section pointers, sizes and map /// \brief Generate all the base pointers, section pointers, sizes and map
/// types for the extracted mappable expressions. /// types for the extracted mappable expressions. Also, for each item that
void generateAllInfo(MapValuesArrayTy &BasePointers, /// relates with a device pointer, a pair of the relevant declaration and
/// index where it occurs is appended to the device pointers info array.
void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
MapFlagsArrayTy &Types) const { MapFlagsArrayTy &Types) const {
BasePointers.clear(); BasePointers.clear();
@ -5410,9 +5433,28 @@ public:
Types.clear(); Types.clear();
struct MapInfo { struct MapInfo {
/// Kind that defines how a device pointer has to be returned.
enum ReturnPointerKind {
// Don't have to return any pointer.
RPK_None,
// Pointer is the base of the declaration.
RPK_Base,
// Pointer is a member of the base declaration - 'this'
RPK_Member,
// Pointer is a reference and a member of the base declaration - 'this'
RPK_MemberReference,
};
OMPClauseMappableExprCommon::MappableExprComponentListRef Components; OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
OpenMPMapClauseKind MapType; OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
OpenMPMapClauseKind MapTypeModifier; OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown;
ReturnPointerKind ReturnDevicePointer = RPK_None;
MapInfo(
OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
ReturnPointerKind ReturnDevicePointer)
: Components(Components), MapType(MapType),
MapTypeModifier(MapTypeModifier),
ReturnDevicePointer(ReturnDevicePointer) {}
}; };
// We have to process the component lists that relate with the same // We have to process the component lists that relate with the same
@ -5422,14 +5464,15 @@ public:
// Helper function to fill the information map for the different supported // Helper function to fill the information map for the different supported
// clauses. // clauses.
auto &&InfoGen = auto &&InfoGen = [&Info](
[&Info](const ValueDecl *D, const ValueDecl *D,
OMPClauseMappableExprCommon::MappableExprComponentListRef L, OMPClauseMappableExprCommon::MappableExprComponentListRef L,
OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier) { OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier,
const ValueDecl *VD = MapInfo::ReturnPointerKind ReturnDevicePointer = MapInfo::RPK_None) {
D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; const ValueDecl *VD =
Info[VD].push_back({L, MapType, MapModifier}); D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
}; Info[VD].push_back({L, MapType, MapModifier, ReturnDevicePointer});
};
for (auto *C : Directive.getClausesOfKind<OMPMapClause>()) for (auto *C : Directive.getClausesOfKind<OMPMapClause>())
for (auto L : C->component_lists()) for (auto L : C->component_lists())
@ -5441,6 +5484,51 @@ public:
for (auto L : C->component_lists()) for (auto L : C->component_lists())
InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown); InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown);
// Look at the use_device_ptr clause information and mark the existing map
// entries as such. If there is no map information for an entry in the
// use_device_ptr list, we create one with map type 'alloc' and zero size
// section. It is the user fault if that was not mapped before.
for (auto *C : Directive.getClausesOfKind<OMPUseDevicePtrClause>())
for (auto L : C->component_lists()) {
assert(!L.second.empty() && "Not expecting empty list of components!");
const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
VD = cast<ValueDecl>(VD->getCanonicalDecl());
auto *IE = L.second.back().getAssociatedExpression();
// If the first component is a member expression, we have to look into
// 'this', which maps to null in the map of map information. Otherwise
// look directly for the information.
auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
// We potentially have map information for this declaration already.
// Look for the first set of components that refer to it.
if (It != Info.end()) {
auto CI = std::find_if(
It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
return MI.Components.back().getAssociatedDeclaration() == VD;
});
// If we found a map entry, signal that the pointer has to be returned
// and move on to the next declaration.
if (CI != It->second.end()) {
CI->ReturnDevicePointer = isa<MemberExpr>(IE)
? (VD->getType()->isReferenceType()
? MapInfo::RPK_MemberReference
: MapInfo::RPK_Member)
: MapInfo::RPK_Base;
continue;
}
}
// We didn't find any match in our map information - generate a zero
// size array section.
llvm::Value *Ptr =
CGF.EmitLoadOfLValue(CGF.EmitLValue(IE), SourceLocation())
.getScalarVal();
BasePointers.push_back({Ptr, VD});
Pointers.push_back(Ptr);
Sizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy));
Types.push_back(OMP_MAP_RETURN_PTR | OMP_MAP_FIRST_REF);
}
for (auto &M : Info) { for (auto &M : Info) {
// We need to know when we generate information for the first component // We need to know when we generate information for the first component
// associated with a capture, because the mapping flags depend on it. // associated with a capture, because the mapping flags depend on it.
@ -5448,9 +5536,35 @@ public:
for (MapInfo &L : M.second) { for (MapInfo &L : M.second) {
assert(!L.Components.empty() && assert(!L.Components.empty() &&
"Not expecting declaration with no component lists."); "Not expecting declaration with no component lists.");
// Remember the current base pointer index.
unsigned CurrentBasePointersIdx = BasePointers.size();
generateInfoForComponentList(L.MapType, L.MapTypeModifier, L.Components, generateInfoForComponentList(L.MapType, L.MapTypeModifier, L.Components,
BasePointers, Pointers, Sizes, Types, BasePointers, Pointers, Sizes, Types,
IsFirstComponentList); IsFirstComponentList);
// If this entry relates with a device pointer, set the relevant
// declaration and add the 'return pointer' flag.
if (IsFirstComponentList &&
L.ReturnDevicePointer != MapInfo::RPK_None) {
// If the pointer is not the base of the map, we need to skip the
// base. If it is a reference in a member field, we also need to skip
// the map of the reference.
if (L.ReturnDevicePointer != MapInfo::RPK_Base) {
++CurrentBasePointersIdx;
if (L.ReturnDevicePointer == MapInfo::RPK_MemberReference)
++CurrentBasePointersIdx;
}
assert(BasePointers.size() > CurrentBasePointersIdx &&
"Unexpected number of mapped base pointers.");
auto *RelevantVD = L.Components.back().getAssociatedDeclaration();
assert(RelevantVD &&
"No relevant declaration related with device pointer??");
BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PTR;
}
IsFirstComponentList = false; IsFirstComponentList = false;
} }
} }
@ -5459,7 +5573,7 @@ public:
/// \brief Generate the base pointers, section pointers, sizes and map types /// \brief Generate the base pointers, section pointers, sizes and map types
/// associated to a given capture. /// associated to a given capture.
void generateInfoForCapture(const CapturedStmt::Capture *Cap, void generateInfoForCapture(const CapturedStmt::Capture *Cap,
MapValuesArrayTy &BasePointers, MapBaseValuesArrayTy &BasePointers,
MapValuesArrayTy &Pointers, MapValuesArrayTy &Pointers,
MapValuesArrayTy &Sizes, MapValuesArrayTy &Sizes,
MapFlagsArrayTy &Types) const { MapFlagsArrayTy &Types) const {
@ -5496,12 +5610,12 @@ public:
/// \brief Generate the default map information for a given capture \a CI, /// \brief Generate the default map information for a given capture \a CI,
/// record field declaration \a RI and captured value \a CV. /// record field declaration \a RI and captured value \a CV.
void generateDefaultMapInfo( void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
const CapturedStmt::Capture &CI, const FieldDecl &RI, llvm::Value *CV, const FieldDecl &RI, llvm::Value *CV,
MappableExprsHandler::MapValuesArrayTy &CurBasePointers, MapBaseValuesArrayTy &CurBasePointers,
MappableExprsHandler::MapValuesArrayTy &CurPointers, MapValuesArrayTy &CurPointers,
MappableExprsHandler::MapValuesArrayTy &CurSizes, MapValuesArrayTy &CurSizes,
MappableExprsHandler::MapFlagsArrayTy &CurMapTypes) { MapFlagsArrayTy &CurMapTypes) {
// Do the default mapping. // Do the default mapping.
if (CI.capturesThis()) { if (CI.capturesThis()) {
@ -5510,15 +5624,14 @@ public:
const PointerType *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); const PointerType *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType())); CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType()));
// Default map type. // Default map type.
CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_TO | CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
MappableExprsHandler::OMP_MAP_FROM);
} else if (CI.capturesVariableByCopy()) { } else if (CI.capturesVariableByCopy()) {
CurBasePointers.push_back(CV); CurBasePointers.push_back(CV);
CurPointers.push_back(CV); CurPointers.push_back(CV);
if (!RI.getType()->isAnyPointerType()) { if (!RI.getType()->isAnyPointerType()) {
// We have to signal to the runtime captures passed by value that are // We have to signal to the runtime captures passed by value that are
// not pointers. // not pointers.
CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL); CurMapTypes.push_back(OMP_MAP_PRIVATE_VAL);
CurSizes.push_back(CGF.getTypeSize(RI.getType())); CurSizes.push_back(CGF.getTypeSize(RI.getType()));
} else { } else {
// Pointers are implicitly mapped with a zero size and no flags // Pointers are implicitly mapped with a zero size and no flags
@ -5539,9 +5652,8 @@ public:
// default the value doesn't have to be retrieved. For an aggregate // default the value doesn't have to be retrieved. For an aggregate
// type, the default is 'tofrom'. // type, the default is 'tofrom'.
CurMapTypes.push_back(ElementType->isAggregateType() CurMapTypes.push_back(ElementType->isAggregateType()
? (MappableExprsHandler::OMP_MAP_TO | ? (OMP_MAP_TO | OMP_MAP_FROM)
MappableExprsHandler::OMP_MAP_FROM) : OMP_MAP_TO);
: MappableExprsHandler::OMP_MAP_TO);
// If we have a capture by reference we may need to add the private // If we have a capture by reference we may need to add the private
// pointer flag if the base declaration shows in some first-private // pointer flag if the base declaration shows in some first-private
@ -5551,7 +5663,7 @@ public:
} }
// Every default map produces a single argument, so, it is always the // Every default map produces a single argument, so, it is always the
// first one. // first one.
CurMapTypes.back() |= MappableExprsHandler::OMP_MAP_FIRST_REF; CurMapTypes.back() |= OMP_MAP_FIRST_REF;
} }
}; };
@ -5566,19 +5678,20 @@ enum OpenMPOffloadingReservedDeviceIDs {
/// offloading runtime library. If there is no map or capture information, /// offloading runtime library. If there is no map or capture information,
/// return nullptr by reference. /// return nullptr by reference.
static void static void
emitOffloadingArrays(CodeGenFunction &CGF, llvm::Value *&BasePointersArray, emitOffloadingArrays(CodeGenFunction &CGF,
llvm::Value *&PointersArray, llvm::Value *&SizesArray, MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
llvm::Value *&MapTypesArray,
MappableExprsHandler::MapValuesArrayTy &BasePointers,
MappableExprsHandler::MapValuesArrayTy &Pointers, MappableExprsHandler::MapValuesArrayTy &Pointers,
MappableExprsHandler::MapValuesArrayTy &Sizes, MappableExprsHandler::MapValuesArrayTy &Sizes,
MappableExprsHandler::MapFlagsArrayTy &MapTypes) { MappableExprsHandler::MapFlagsArrayTy &MapTypes,
CGOpenMPRuntime::TargetDataInfo &Info) {
auto &CGM = CGF.CGM; auto &CGM = CGF.CGM;
auto &Ctx = CGF.getContext(); auto &Ctx = CGF.getContext();
BasePointersArray = PointersArray = SizesArray = MapTypesArray = nullptr; // Reset the array information.
Info.clearArrayInfo();
Info.NumberOfPtrs = BasePointers.size();
if (unsigned PointerNumVal = BasePointers.size()) { if (Info.NumberOfPtrs) {
// Detect if we have any capture size requiring runtime evaluation of the // Detect if we have any capture size requiring runtime evaluation of the
// size so that a constant array could be eventually used. // size so that a constant array could be eventually used.
bool hasRuntimeEvaluationCaptureSize = false; bool hasRuntimeEvaluationCaptureSize = false;
@ -5588,14 +5701,14 @@ emitOffloadingArrays(CodeGenFunction &CGF, llvm::Value *&BasePointersArray,
break; break;
} }
llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true); llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
QualType PointerArrayType = QualType PointerArrayType =
Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
/*IndexTypeQuals=*/0); /*IndexTypeQuals=*/0);
BasePointersArray = Info.BasePointersArray =
CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
PointersArray = Info.PointersArray =
CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
// If we don't have any VLA types or other types that require runtime // If we don't have any VLA types or other types that require runtime
@ -5605,7 +5718,7 @@ emitOffloadingArrays(CodeGenFunction &CGF, llvm::Value *&BasePointersArray,
QualType SizeArrayType = Ctx.getConstantArrayType( QualType SizeArrayType = Ctx.getConstantArrayType(
Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
/*IndexTypeQuals=*/0); /*IndexTypeQuals=*/0);
SizesArray = Info.SizesArray =
CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
} else { } else {
// We expect all the sizes to be constant, so we collect them to create // We expect all the sizes to be constant, so we collect them to create
@ -5621,7 +5734,7 @@ emitOffloadingArrays(CodeGenFunction &CGF, llvm::Value *&BasePointersArray,
/*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
SizesArrayInit, ".offload_sizes"); SizesArrayInit, ".offload_sizes");
SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
SizesArray = SizesArrayGbl; Info.SizesArray = SizesArrayGbl;
} }
// The map types are always constant so we don't need to generate code to // The map types are always constant so we don't need to generate code to
@ -5633,10 +5746,10 @@ emitOffloadingArrays(CodeGenFunction &CGF, llvm::Value *&BasePointersArray,
/*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
MapTypesArrayInit, ".offload_maptypes"); MapTypesArrayInit, ".offload_maptypes");
MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
MapTypesArray = MapTypesArrayGbl; Info.MapTypesArray = MapTypesArrayGbl;
for (unsigned i = 0; i < PointerNumVal; ++i) { for (unsigned i = 0; i < Info.NumberOfPtrs; ++i) {
llvm::Value *BPVal = BasePointers[i]; llvm::Value *BPVal = *BasePointers[i];
if (BPVal->getType()->isPointerTy()) if (BPVal->getType()->isPointerTy())
BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy); BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy);
else { else {
@ -5645,11 +5758,15 @@ emitOffloadingArrays(CodeGenFunction &CGF, llvm::Value *&BasePointersArray,
BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy); BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy);
} }
llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray, llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
0, i); Info.BasePointersArray, 0, i);
Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
CGF.Builder.CreateStore(BPVal, BPAddr); CGF.Builder.CreateStore(BPVal, BPAddr);
if (Info.requiresDevicePointerInfo())
if (auto *DevVD = BasePointers[i].getDevicePtrDecl())
Info.CaptureDeviceAddrMap.insert(std::make_pair(DevVD, BPAddr));
llvm::Value *PVal = Pointers[i]; llvm::Value *PVal = Pointers[i];
if (PVal->getType()->isPointerTy()) if (PVal->getType()->isPointerTy())
PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy); PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy);
@ -5659,14 +5776,15 @@ emitOffloadingArrays(CodeGenFunction &CGF, llvm::Value *&BasePointersArray,
PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy); PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy);
} }
llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, 0, llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
i); Info.PointersArray, 0, i);
Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
CGF.Builder.CreateStore(PVal, PAddr); CGF.Builder.CreateStore(PVal, PAddr);
if (hasRuntimeEvaluationCaptureSize) { if (hasRuntimeEvaluationCaptureSize) {
llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray, llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs),
Info.SizesArray,
/*Idx0=*/0, /*Idx0=*/0,
/*Idx1=*/i); /*Idx1=*/i);
Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
@ -5682,23 +5800,24 @@ emitOffloadingArrays(CodeGenFunction &CGF, llvm::Value *&BasePointersArray,
static void emitOffloadingArraysArgument( static void emitOffloadingArraysArgument(
CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
llvm::Value *&MapTypesArrayArg, llvm::Value *BasePointersArray, llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
llvm::Value *PointersArray, llvm::Value *SizesArray,
llvm::Value *MapTypesArray, unsigned NumElems) {
auto &CGM = CGF.CGM; auto &CGM = CGF.CGM;
if (NumElems) { if (Info.NumberOfPtrs) {
BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
llvm::ArrayType::get(CGM.VoidPtrTy, NumElems), BasePointersArray, llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
Info.BasePointersArray,
/*Idx0=*/0, /*Idx1=*/0); /*Idx0=*/0, /*Idx1=*/0);
PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
llvm::ArrayType::get(CGM.VoidPtrTy, NumElems), PointersArray, llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
Info.PointersArray,
/*Idx0=*/0, /*Idx0=*/0,
/*Idx1=*/0); /*Idx1=*/0);
SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
llvm::ArrayType::get(CGM.SizeTy, NumElems), SizesArray, llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray,
/*Idx0=*/0, /*Idx1=*/0); /*Idx0=*/0, /*Idx1=*/0);
MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
llvm::ArrayType::get(CGM.Int32Ty, NumElems), MapTypesArray, llvm::ArrayType::get(CGM.Int32Ty, Info.NumberOfPtrs),
Info.MapTypesArray,
/*Idx0=*/0, /*Idx0=*/0,
/*Idx1=*/0); /*Idx1=*/0);
} else { } else {
@ -5725,12 +5844,12 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
// Fill up the arrays with all the captured variables. // Fill up the arrays with all the captured variables.
MappableExprsHandler::MapValuesArrayTy KernelArgs; MappableExprsHandler::MapValuesArrayTy KernelArgs;
MappableExprsHandler::MapValuesArrayTy BasePointers; MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
MappableExprsHandler::MapValuesArrayTy Pointers; MappableExprsHandler::MapValuesArrayTy Pointers;
MappableExprsHandler::MapValuesArrayTy Sizes; MappableExprsHandler::MapValuesArrayTy Sizes;
MappableExprsHandler::MapFlagsArrayTy MapTypes; MappableExprsHandler::MapFlagsArrayTy MapTypes;
MappableExprsHandler::MapValuesArrayTy CurBasePointers; MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
MappableExprsHandler::MapValuesArrayTy CurPointers; MappableExprsHandler::MapValuesArrayTy CurPointers;
MappableExprsHandler::MapValuesArrayTy CurSizes; MappableExprsHandler::MapValuesArrayTy CurSizes;
MappableExprsHandler::MapFlagsArrayTy CurMapTypes; MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
@ -5779,7 +5898,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
// The kernel args are always the first elements of the base pointers // The kernel args are always the first elements of the base pointers
// associated with a capture. // associated with a capture.
KernelArgs.push_back(CurBasePointers.front()); KernelArgs.push_back(*CurBasePointers.front());
// We need to append the results of this capture to what we already have. // We need to append the results of this capture to what we already have.
BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
Pointers.append(CurPointers.begin(), CurPointers.end()); Pointers.append(CurPointers.begin(), CurPointers.end());
@ -5802,17 +5921,11 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
&D](CodeGenFunction &CGF, PrePostActionTy &) { &D](CodeGenFunction &CGF, PrePostActionTy &) {
auto &RT = CGF.CGM.getOpenMPRuntime(); auto &RT = CGF.CGM.getOpenMPRuntime();
// Emit the offloading arrays. // Emit the offloading arrays.
llvm::Value *BasePointersArray; TargetDataInfo Info;
llvm::Value *PointersArray; emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
llvm::Value *SizesArray; emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
llvm::Value *MapTypesArray; Info.PointersArray, Info.SizesArray,
emitOffloadingArrays(CGF, BasePointersArray, PointersArray, SizesArray, Info.MapTypesArray, Info);
MapTypesArray, BasePointers, Pointers, Sizes,
MapTypes);
emitOffloadingArraysArgument(CGF, BasePointersArray, PointersArray,
SizesArray, MapTypesArray, BasePointersArray,
PointersArray, SizesArray, MapTypesArray,
BasePointers.size());
// On top of the arrays that were filled up, the target offloading call // On top of the arrays that were filled up, the target offloading call
// takes as arguments the device id as well as the host pointer. The host // takes as arguments the device id as well as the host pointer. The host
@ -5853,15 +5966,19 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
assert(ThreadLimit && "Thread limit expression should be available along " assert(ThreadLimit && "Thread limit expression should be available along "
"with number of teams."); "with number of teams.");
llvm::Value *OffloadingArgs[] = { llvm::Value *OffloadingArgs[] = {
DeviceID, OutlinedFnID, PointerNum, DeviceID, OutlinedFnID,
BasePointersArray, PointersArray, SizesArray, PointerNum, Info.BasePointersArray,
MapTypesArray, NumTeams, ThreadLimit}; Info.PointersArray, Info.SizesArray,
Info.MapTypesArray, NumTeams,
ThreadLimit};
Return = CGF.EmitRuntimeCall( Return = CGF.EmitRuntimeCall(
RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs); RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs);
} else { } else {
llvm::Value *OffloadingArgs[] = { llvm::Value *OffloadingArgs[] = {
DeviceID, OutlinedFnID, PointerNum, BasePointersArray, DeviceID, OutlinedFnID,
PointersArray, SizesArray, MapTypesArray}; PointerNum, Info.BasePointersArray,
Info.PointersArray, Info.SizesArray,
Info.MapTypesArray};
Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target), Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target),
OffloadingArgs); OffloadingArgs);
} }
@ -6073,29 +6190,23 @@ void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
PushNumTeamsArgs); PushNumTeamsArgs);
} }
void CGOpenMPRuntime::emitTargetDataCalls(CodeGenFunction &CGF, void CGOpenMPRuntime::emitTargetDataCalls(
const OMPExecutableDirective &D, CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
const Expr *Device,
const RegionCodeGenTy &CodeGen) {
if (!CGF.HaveInsertPoint()) if (!CGF.HaveInsertPoint())
return; return;
llvm::Value *BasePointersArray = nullptr; // Action used to replace the default codegen action and turn privatization
llvm::Value *PointersArray = nullptr; // off.
llvm::Value *SizesArray = nullptr; PrePostActionTy NoPrivAction;
llvm::Value *MapTypesArray = nullptr;
unsigned NumOfPtrs = 0;
// Generate the code for the opening of the data environment. Capture all the // Generate the code for the opening of the data environment. Capture all the
// arguments of the runtime call by reference because they are used in the // arguments of the runtime call by reference because they are used in the
// closing of the region. // closing of the region.
auto &&BeginThenGen = [&D, &CGF, &BasePointersArray, &PointersArray, auto &&BeginThenGen = [&D, &CGF, Device, &Info, &CodeGen, &NoPrivAction](
&SizesArray, &MapTypesArray, Device, CodeGenFunction &CGF, PrePostActionTy &) {
&NumOfPtrs](CodeGenFunction &CGF, PrePostActionTy &) {
// Fill up the arrays with all the mapped variables. // Fill up the arrays with all the mapped variables.
MappableExprsHandler::MapValuesArrayTy BasePointers; MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
MappableExprsHandler::MapValuesArrayTy Pointers; MappableExprsHandler::MapValuesArrayTy Pointers;
MappableExprsHandler::MapValuesArrayTy Sizes; MappableExprsHandler::MapValuesArrayTy Sizes;
MappableExprsHandler::MapFlagsArrayTy MapTypes; MappableExprsHandler::MapFlagsArrayTy MapTypes;
@ -6103,21 +6214,16 @@ void CGOpenMPRuntime::emitTargetDataCalls(CodeGenFunction &CGF,
// Get map clause information. // Get map clause information.
MappableExprsHandler MCHandler(D, CGF); MappableExprsHandler MCHandler(D, CGF);
MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
NumOfPtrs = BasePointers.size();
// Fill up the arrays and create the arguments. // Fill up the arrays and create the arguments.
emitOffloadingArrays(CGF, BasePointersArray, PointersArray, SizesArray, emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
MapTypesArray, BasePointers, Pointers, Sizes,
MapTypes);
llvm::Value *BasePointersArrayArg = nullptr; llvm::Value *BasePointersArrayArg = nullptr;
llvm::Value *PointersArrayArg = nullptr; llvm::Value *PointersArrayArg = nullptr;
llvm::Value *SizesArrayArg = nullptr; llvm::Value *SizesArrayArg = nullptr;
llvm::Value *MapTypesArrayArg = nullptr; llvm::Value *MapTypesArrayArg = nullptr;
emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
SizesArrayArg, MapTypesArrayArg, SizesArrayArg, MapTypesArrayArg, Info);
BasePointersArray, PointersArray, SizesArray,
MapTypesArray, NumOfPtrs);
// Emit device ID if any. // Emit device ID if any.
llvm::Value *DeviceID = nullptr; llvm::Value *DeviceID = nullptr;
@ -6128,7 +6234,7 @@ void CGOpenMPRuntime::emitTargetDataCalls(CodeGenFunction &CGF,
DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
// Emit the number of elements in the offloading arrays. // Emit the number of elements in the offloading arrays.
auto *PointerNum = CGF.Builder.getInt32(NumOfPtrs); auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
llvm::Value *OffloadingArgs[] = { llvm::Value *OffloadingArgs[] = {
DeviceID, PointerNum, BasePointersArrayArg, DeviceID, PointerNum, BasePointersArrayArg,
@ -6136,23 +6242,24 @@ void CGOpenMPRuntime::emitTargetDataCalls(CodeGenFunction &CGF,
auto &RT = CGF.CGM.getOpenMPRuntime(); auto &RT = CGF.CGM.getOpenMPRuntime();
CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin), CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin),
OffloadingArgs); OffloadingArgs);
// If device pointer privatization is required, emit the body of the region
// here. It will have to be duplicated: with and without privatization.
if (!Info.CaptureDeviceAddrMap.empty())
CodeGen(CGF);
}; };
// Generate code for the closing of the data region. // Generate code for the closing of the data region.
auto &&EndThenGen = [&CGF, &BasePointersArray, &PointersArray, &SizesArray, auto &&EndThenGen = [&CGF, Device, &Info](CodeGenFunction &CGF,
&MapTypesArray, Device, PrePostActionTy &) {
&NumOfPtrs](CodeGenFunction &CGF, PrePostActionTy &) { assert(Info.isValid() && "Invalid data environment closing arguments.");
assert(BasePointersArray && PointersArray && SizesArray && MapTypesArray &&
NumOfPtrs && "Invalid data environment closing arguments.");
llvm::Value *BasePointersArrayArg = nullptr; llvm::Value *BasePointersArrayArg = nullptr;
llvm::Value *PointersArrayArg = nullptr; llvm::Value *PointersArrayArg = nullptr;
llvm::Value *SizesArrayArg = nullptr; llvm::Value *SizesArrayArg = nullptr;
llvm::Value *MapTypesArrayArg = nullptr; llvm::Value *MapTypesArrayArg = nullptr;
emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
SizesArrayArg, MapTypesArrayArg, SizesArrayArg, MapTypesArrayArg, Info);
BasePointersArray, PointersArray, SizesArray,
MapTypesArray, NumOfPtrs);
// Emit device ID if any. // Emit device ID if any.
llvm::Value *DeviceID = nullptr; llvm::Value *DeviceID = nullptr;
@ -6163,7 +6270,7 @@ void CGOpenMPRuntime::emitTargetDataCalls(CodeGenFunction &CGF,
DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
// Emit the number of elements in the offloading arrays. // Emit the number of elements in the offloading arrays.
auto *PointerNum = CGF.Builder.getInt32(NumOfPtrs); auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
llvm::Value *OffloadingArgs[] = { llvm::Value *OffloadingArgs[] = {
DeviceID, PointerNum, BasePointersArrayArg, DeviceID, PointerNum, BasePointersArrayArg,
@ -6173,24 +6280,40 @@ void CGOpenMPRuntime::emitTargetDataCalls(CodeGenFunction &CGF,
OffloadingArgs); OffloadingArgs);
}; };
// In the event we get an if clause, we don't have to take any action on the // If we need device pointer privatization, we need to emit the body of the
// else side. // region with no privatization in the 'else' branch of the conditional.
auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; // Otherwise, we don't have to do anything.
auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
PrePostActionTy &) {
if (!Info.CaptureDeviceAddrMap.empty()) {
CodeGen.setAction(NoPrivAction);
CodeGen(CGF);
}
};
// We don't have to do anything to close the region if the if clause evaluates
// to false.
auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
if (IfCond) { if (IfCond) {
emitOMPIfClause(CGF, IfCond, BeginThenGen, ElseGen); emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
} else { } else {
RegionCodeGenTy BeginThenRCG(BeginThenGen); RegionCodeGenTy RCG(BeginThenGen);
BeginThenRCG(CGF); RCG(CGF);
} }
CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, CodeGen); // If we don't require privatization of device pointers, we emit the body in
// between the runtime calls. This avoids duplicating the body code.
if (Info.CaptureDeviceAddrMap.empty()) {
CodeGen.setAction(NoPrivAction);
CodeGen(CGF);
}
if (IfCond) { if (IfCond) {
emitOMPIfClause(CGF, IfCond, EndThenGen, ElseGen); emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
} else { } else {
RegionCodeGenTy EndThenRCG(EndThenGen); RegionCodeGenTy RCG(EndThenGen);
EndThenRCG(CGF); RCG(CGF);
} }
} }
@ -6208,7 +6331,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
// Generate the code for the opening of the data environment. // Generate the code for the opening of the data environment.
auto &&ThenGen = [&D, &CGF, Device](CodeGenFunction &CGF, PrePostActionTy &) { auto &&ThenGen = [&D, &CGF, Device](CodeGenFunction &CGF, PrePostActionTy &) {
// Fill up the arrays with all the mapped variables. // Fill up the arrays with all the mapped variables.
MappableExprsHandler::MapValuesArrayTy BasePointers; MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
MappableExprsHandler::MapValuesArrayTy Pointers; MappableExprsHandler::MapValuesArrayTy Pointers;
MappableExprsHandler::MapValuesArrayTy Sizes; MappableExprsHandler::MapValuesArrayTy Sizes;
MappableExprsHandler::MapFlagsArrayTy MapTypes; MappableExprsHandler::MapFlagsArrayTy MapTypes;
@ -6217,19 +6340,12 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
MappableExprsHandler MEHandler(D, CGF); MappableExprsHandler MEHandler(D, CGF);
MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
llvm::Value *BasePointersArrayArg = nullptr;
llvm::Value *PointersArrayArg = nullptr;
llvm::Value *SizesArrayArg = nullptr;
llvm::Value *MapTypesArrayArg = nullptr;
// Fill up the arrays and create the arguments. // Fill up the arrays and create the arguments.
emitOffloadingArrays(CGF, BasePointersArrayArg, PointersArrayArg, TargetDataInfo Info;
SizesArrayArg, MapTypesArrayArg, BasePointers, emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
Pointers, Sizes, MapTypes); emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
emitOffloadingArraysArgument( Info.PointersArray, Info.SizesArray,
CGF, BasePointersArrayArg, PointersArrayArg, SizesArrayArg, Info.MapTypesArray, Info);
MapTypesArrayArg, BasePointersArrayArg, PointersArrayArg, SizesArrayArg,
MapTypesArrayArg, BasePointers.size());
// Emit device ID if any. // Emit device ID if any.
llvm::Value *DeviceID = nullptr; llvm::Value *DeviceID = nullptr;
@ -6243,8 +6359,8 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
auto *PointerNum = CGF.Builder.getInt32(BasePointers.size()); auto *PointerNum = CGF.Builder.getInt32(BasePointers.size());
llvm::Value *OffloadingArgs[] = { llvm::Value *OffloadingArgs[] = {
DeviceID, PointerNum, BasePointersArrayArg, DeviceID, PointerNum, Info.BasePointersArray,
PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; Info.PointersArray, Info.SizesArray, Info.MapTypesArray};
auto &RT = CGF.CGM.getOpenMPRuntime(); auto &RT = CGF.CGM.getOpenMPRuntime();
// Select the right runtime function call for each expected standalone // Select the right runtime function call for each expected standalone

View File

@ -997,17 +997,59 @@ public:
virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams,
const Expr *ThreadLimit, SourceLocation Loc); const Expr *ThreadLimit, SourceLocation Loc);
/// Struct that keeps all the relevant information that should be kept
/// throughout a 'target data' region.
class TargetDataInfo {
/// Set to true if device pointer information have to be obtained.
bool RequiresDevicePointerInfo = false;
public:
/// The array of base pointer passed to the runtime library.
llvm::Value *BasePointersArray = nullptr;
/// The array of section pointers passed to the runtime library.
llvm::Value *PointersArray = nullptr;
/// The array of sizes passed to the runtime library.
llvm::Value *SizesArray = nullptr;
/// The array of map types passed to the runtime library.
llvm::Value *MapTypesArray = nullptr;
/// The total number of pointers passed to the runtime library.
unsigned NumberOfPtrs = 0u;
/// Map between the a declaration of a capture and the corresponding base
/// pointer address where the runtime returns the device pointers.
llvm::DenseMap<const ValueDecl *, Address> CaptureDeviceAddrMap;
explicit TargetDataInfo() {}
explicit TargetDataInfo(bool RequiresDevicePointerInfo)
: RequiresDevicePointerInfo(RequiresDevicePointerInfo) {}
/// Clear information about the data arrays.
void clearArrayInfo() {
BasePointersArray = nullptr;
PointersArray = nullptr;
SizesArray = nullptr;
MapTypesArray = nullptr;
NumberOfPtrs = 0u;
}
/// Return true if the current target data information has valid arrays.
bool isValid() {
return BasePointersArray && PointersArray && SizesArray &&
MapTypesArray && NumberOfPtrs;
}
bool requiresDevicePointerInfo() { return RequiresDevicePointerInfo; }
};
/// \brief Emit the target data mapping code associated with \a D. /// \brief Emit the target data mapping code associated with \a D.
/// \param D Directive to emit. /// \param D Directive to emit.
/// \param IfCond Expression evaluated in if clause associated with the target /// \param IfCond Expression evaluated in if clause associated with the
/// directive, or null if no if clause is used. /// target directive, or null if no device clause is used.
/// \param Device Expression evaluated in device clause associated with the /// \param Device Expression evaluated in device clause associated with the
/// target directive, or null if no device clause is used. /// target directive, or null if no device clause is used.
/// \param CodeGen Function that emits the enclosed region. /// \param Info A record used to store information that needs to be preserved
/// until the region is closed.
virtual void emitTargetDataCalls(CodeGenFunction &CGF, virtual void emitTargetDataCalls(CodeGenFunction &CGF,
const OMPExecutableDirective &D, const OMPExecutableDirective &D,
const Expr *IfCond, const Expr *Device, const Expr *IfCond, const Expr *Device,
const RegionCodeGenTy &CodeGen); const RegionCodeGenTy &CodeGen,
TargetDataInfo &Info);
/// \brief Emit the data mapping/movement code associated with the directive /// \brief Emit the data mapping/movement code associated with the directive
/// \a D that should be of the form 'target [{enter|exit} data | update]'. /// \a D that should be of the form 'target [{enter|exit} data | update]'.

View File

@ -3400,22 +3400,137 @@ CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
return BreakContinueStack.back().BreakBlock; return BreakContinueStack.back().BreakBlock;
} }
void CodeGenFunction::EmitOMPUseDevicePtrClause(
const OMPClause &NC, OMPPrivateScope &PrivateScope,
const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
const auto &C = cast<OMPUseDevicePtrClause>(NC);
auto OrigVarIt = C.varlist_begin();
auto InitIt = C.inits().begin();
for (auto PvtVarIt : C.private_copies()) {
auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
// In order to identify the right initializer we need to match the
// declaration used by the mapping logic. In some cases we may get
// OMPCapturedExprDecl that refers to the original declaration.
const ValueDecl *MatchingVD = OrigVD;
if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
// OMPCapturedExprDecl are used to privative fields of the current
// structure.
auto *ME = cast<MemberExpr>(OED->getInit());
assert(isa<CXXThisExpr>(ME->getBase()) &&
"Base should be the current struct!");
MatchingVD = ME->getMemberDecl();
}
// If we don't have information about the current list item, move on to
// the next one.
auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
if (InitAddrIt == CaptureDeviceAddrMap.end())
continue;
bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
// Initialize the temporary initialization variable with the address we
// get from the runtime library. We have to cast the source address
// because it is always a void *. References are materialized in the
// privatization scope, so the initialization here disregards the fact
// the original variable is a reference.
QualType AddrQTy =
getContext().getPointerType(OrigVD->getType().getNonReferenceType());
llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
setAddrOfLocalVar(InitVD, InitAddr);
// Emit private declaration, it will be initialized by the value we
// declaration we just added to the local declarations map.
EmitDecl(*PvtVD);
// The initialization variables reached its purpose in the emission
// ofthe previous declaration, so we don't need it anymore.
LocalDeclMap.erase(InitVD);
// Return the address of the private variable.
return GetAddrOfLocalVar(PvtVD);
});
assert(IsRegistered && "firstprivate var already registered as private");
// Silence the warning about unused variable.
(void)IsRegistered;
++OrigVarIt;
++InitIt;
}
}
// Generate the instructions for '#pragma omp target data' directive. // Generate the instructions for '#pragma omp target data' directive.
void CodeGenFunction::EmitOMPTargetDataDirective( void CodeGenFunction::EmitOMPTargetDataDirective(
const OMPTargetDataDirective &S) { const OMPTargetDataDirective &S) {
// The target data enclosed region is implemented just by emitting the CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true);
// statement.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { // Create a pre/post action to signal the privatization of the device pointer.
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); // This action can be replaced by the OpenMP runtime code generation to
// deactivate privatization.
bool PrivatizeDevicePointers = false;
class DevicePointerPrivActionTy : public PrePostActionTy {
bool &PrivatizeDevicePointers;
public:
explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
: PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
void Enter(CodeGenFunction &CGF) override {
PrivatizeDevicePointers = true;
}
}; };
DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
CodeGenFunction &CGF, PrePostActionTy &Action) {
auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
CGF.EmitStmt(
cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
};
// Codegen that selects wheather to generate the privatization code or not.
auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
&InnermostCodeGen](CodeGenFunction &CGF,
PrePostActionTy &Action) {
RegionCodeGenTy RCG(InnermostCodeGen);
PrivatizeDevicePointers = false;
// Call the pre-action to change the status of PrivatizeDevicePointers if
// needed.
Action.Enter(CGF);
if (PrivatizeDevicePointers) {
OMPPrivateScope PrivateScope(CGF);
// Emit all instances of the use_device_ptr clause.
for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
Info.CaptureDeviceAddrMap);
(void)PrivateScope.Privatize();
RCG(CGF);
} else
RCG(CGF);
};
// Forward the provided action to the privatization codegen.
RegionCodeGenTy PrivRCG(PrivCodeGen);
PrivRCG.setAction(Action);
// Notwithstanding the body of the region is emitted as inlined directive,
// we don't use an inline scope as changes in the references inside the
// region are expected to be visible outside, so we do not privative them.
OMPLexicalScope Scope(CGF, S);
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
PrivRCG);
};
RegionCodeGenTy RCG(CodeGen);
// If we don't have target devices, don't bother emitting the data mapping // If we don't have target devices, don't bother emitting the data mapping
// code. // code.
if (CGM.getLangOpts().OMPTargetTriples.empty()) { if (CGM.getLangOpts().OMPTargetTriples.empty()) {
OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); RCG(*this);
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_data,
CodeGen);
return; return;
} }
@ -3429,7 +3544,12 @@ void CodeGenFunction::EmitOMPTargetDataDirective(
if (auto *C = S.getSingleClause<OMPDeviceClause>()) if (auto *C = S.getSingleClause<OMPDeviceClause>())
Device = C->getDevice(); Device = C->getDevice();
CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, CodeGen); // Set the action to signal privatization of device pointers.
RCG.setAction(PrivAction);
// Emit region code.
CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
Info);
} }
void CodeGenFunction::EmitOMPTargetEnterDataDirective( void CodeGenFunction::EmitOMPTargetEnterDataDirective(

View File

@ -2392,6 +2392,9 @@ public:
OMPPrivateScope &PrivateScope); OMPPrivateScope &PrivateScope);
void EmitOMPPrivateClause(const OMPExecutableDirective &D, void EmitOMPPrivateClause(const OMPExecutableDirective &D,
OMPPrivateScope &PrivateScope); OMPPrivateScope &PrivateScope);
void EmitOMPUseDevicePtrClause(
const OMPClause &C, OMPPrivateScope &PrivateScope,
const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap);
/// \brief Emit code for copyin clause in \a D directive. The next code is /// \brief Emit code for copyin clause in \a D directive. The next code is
/// generated at the start of outlined functions for directives: /// generated at the start of outlined functions for directives:
/// \code /// \code

View File

@ -11800,7 +11800,10 @@ OMPClause *Sema::ActOnOpenMPUseDevicePtrClause(ArrayRef<Expr *> VarList,
SourceLocation StartLoc, SourceLocation StartLoc,
SourceLocation LParenLoc, SourceLocation LParenLoc,
SourceLocation EndLoc) { SourceLocation EndLoc) {
SmallVector<Expr *, 8> Vars; MappableVarListInfo MVLI(VarList);
SmallVector<Expr *, 8> PrivateCopies;
SmallVector<Expr *, 8> Inits;
for (auto &RefExpr : VarList) { for (auto &RefExpr : VarList) {
assert(RefExpr && "NULL expr in OpenMP use_device_ptr clause."); assert(RefExpr && "NULL expr in OpenMP use_device_ptr clause.");
SourceLocation ELoc; SourceLocation ELoc;
@ -11809,27 +11812,73 @@ OMPClause *Sema::ActOnOpenMPUseDevicePtrClause(ArrayRef<Expr *> VarList,
auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange); auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange);
if (Res.second) { if (Res.second) {
// It will be analyzed later. // It will be analyzed later.
Vars.push_back(RefExpr); MVLI.ProcessedVarList.push_back(RefExpr);
PrivateCopies.push_back(nullptr);
Inits.push_back(nullptr);
} }
ValueDecl *D = Res.first; ValueDecl *D = Res.first;
if (!D) if (!D)
continue; continue;
QualType Type = D->getType(); QualType Type = D->getType();
// item should be a pointer or reference to pointer Type = Type.getNonReferenceType().getUnqualifiedType();
if (!Type.getNonReferenceType()->isPointerType()) {
auto *VD = dyn_cast<VarDecl>(D);
// Item should be a pointer or reference to pointer.
if (!Type->isPointerType()) {
Diag(ELoc, diag::err_omp_usedeviceptr_not_a_pointer) Diag(ELoc, diag::err_omp_usedeviceptr_not_a_pointer)
<< 0 << RefExpr->getSourceRange(); << 0 << RefExpr->getSourceRange();
continue; continue;
} }
Vars.push_back(RefExpr->IgnoreParens());
// Build the private variable and the expression that refers to it.
auto VDPrivate = buildVarDecl(*this, ELoc, Type, D->getName(),
D->hasAttrs() ? &D->getAttrs() : nullptr);
if (VDPrivate->isInvalidDecl())
continue;
CurContext->addDecl(VDPrivate);
auto VDPrivateRefExpr = buildDeclRefExpr(
*this, VDPrivate, RefExpr->getType().getUnqualifiedType(), ELoc);
// Add temporary variable to initialize the private copy of the pointer.
auto *VDInit =
buildVarDecl(*this, RefExpr->getExprLoc(), Type, ".devptr.temp");
auto *VDInitRefExpr = buildDeclRefExpr(*this, VDInit, RefExpr->getType(),
RefExpr->getExprLoc());
AddInitializerToDecl(VDPrivate,
DefaultLvalueConversion(VDInitRefExpr).get(),
/*DirectInit=*/false, /*TypeMayContainAuto=*/false);
// If required, build a capture to implement the privatization initialized
// with the current list item value.
DeclRefExpr *Ref = nullptr;
if (!VD)
Ref = buildCapture(*this, D, SimpleRefExpr, /*WithInit=*/true);
MVLI.ProcessedVarList.push_back(VD ? RefExpr->IgnoreParens() : Ref);
PrivateCopies.push_back(VDPrivateRefExpr);
Inits.push_back(VDInitRefExpr);
// We need to add a data sharing attribute for this variable to make sure it
// is correctly captured. A variable that shows up in a use_device_ptr has
// similar properties of a first private variable.
DSAStack->addDSA(D, RefExpr->IgnoreParens(), OMPC_firstprivate, Ref);
// Create a mappable component for the list item. List items in this clause
// only need a component.
MVLI.VarBaseDeclarations.push_back(D);
MVLI.VarComponents.resize(MVLI.VarComponents.size() + 1);
MVLI.VarComponents.back().push_back(
OMPClauseMappableExprCommon::MappableComponent(SimpleRefExpr, D));
} }
if (Vars.empty()) if (MVLI.ProcessedVarList.empty())
return nullptr; return nullptr;
return OMPUseDevicePtrClause::Create(Context, StartLoc, LParenLoc, EndLoc, return OMPUseDevicePtrClause::Create(
Vars); Context, StartLoc, LParenLoc, EndLoc, MVLI.ProcessedVarList,
PrivateCopies, Inits, MVLI.VarBaseDeclarations, MVLI.VarComponents);
} }
OMPClause *Sema::ActOnOpenMPIsDevicePtrClause(ArrayRef<Expr *> VarList, OMPClause *Sema::ActOnOpenMPIsDevicePtrClause(ArrayRef<Expr *> VarList,

View File

@ -1932,9 +1932,15 @@ OMPClause *OMPClauseReader::readClause() {
NumComponents); NumComponents);
break; break;
} }
case OMPC_use_device_ptr: case OMPC_use_device_ptr: {
C = OMPUseDevicePtrClause::CreateEmpty(Context, Record[Idx++]); unsigned NumVars = Record[Idx++];
unsigned NumDeclarations = Record[Idx++];
unsigned NumLists = Record[Idx++];
unsigned NumComponents = Record[Idx++];
C = OMPUseDevicePtrClause::CreateEmpty(Context, NumVars, NumDeclarations,
NumLists, NumComponents);
break; break;
}
case OMPC_is_device_ptr: case OMPC_is_device_ptr:
C = OMPIsDevicePtrClause::CreateEmpty(Context, Record[Idx++]); C = OMPIsDevicePtrClause::CreateEmpty(Context, Record[Idx++]);
break; break;
@ -2457,13 +2463,54 @@ void OMPClauseReader::VisitOMPFromClause(OMPFromClause *C) {
void OMPClauseReader::VisitOMPUseDevicePtrClause(OMPUseDevicePtrClause *C) { void OMPClauseReader::VisitOMPUseDevicePtrClause(OMPUseDevicePtrClause *C) {
C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx)); C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx));
unsigned NumVars = C->varlist_size(); auto NumVars = C->varlist_size();
auto UniqueDecls = C->getUniqueDeclarationsNum();
auto TotalLists = C->getTotalComponentListNum();
auto TotalComponents = C->getTotalComponentsNum();
SmallVector<Expr *, 16> Vars; SmallVector<Expr *, 16> Vars;
Vars.reserve(NumVars); Vars.reserve(NumVars);
for (unsigned i = 0; i != NumVars; ++i) for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Reader->Reader.ReadSubExpr()); Vars.push_back(Reader->Reader.ReadSubExpr());
C->setVarRefs(Vars); C->setVarRefs(Vars);
Vars.clear(); Vars.clear();
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Reader->Reader.ReadSubExpr());
C->setPrivateCopies(Vars);
Vars.clear();
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Reader->Reader.ReadSubExpr());
C->setInits(Vars);
SmallVector<ValueDecl *, 16> Decls;
Decls.reserve(UniqueDecls);
for (unsigned i = 0; i < UniqueDecls; ++i)
Decls.push_back(
Reader->Reader.ReadDeclAs<ValueDecl>(Reader->F, Record, Idx));
C->setUniqueDecls(Decls);
SmallVector<unsigned, 16> ListsPerDecl;
ListsPerDecl.reserve(UniqueDecls);
for (unsigned i = 0; i < UniqueDecls; ++i)
ListsPerDecl.push_back(Record[Idx++]);
C->setDeclNumLists(ListsPerDecl);
SmallVector<unsigned, 32> ListSizes;
ListSizes.reserve(TotalLists);
for (unsigned i = 0; i < TotalLists; ++i)
ListSizes.push_back(Record[Idx++]);
C->setComponentListSizes(ListSizes);
SmallVector<OMPClauseMappableExprCommon::MappableComponent, 32> Components;
Components.reserve(TotalComponents);
for (unsigned i = 0; i < TotalComponents; ++i) {
Expr *AssociatedExpr = Reader->Reader.ReadSubExpr();
ValueDecl *AssociatedDecl =
Reader->Reader.ReadDeclAs<ValueDecl>(Reader->F, Record, Idx);
Components.push_back(OMPClauseMappableExprCommon::MappableComponent(
AssociatedExpr, AssociatedDecl));
}
C->setComponents(Components, ListSizes);
} }
void OMPClauseReader::VisitOMPIsDevicePtrClause(OMPIsDevicePtrClause *C) { void OMPClauseReader::VisitOMPIsDevicePtrClause(OMPIsDevicePtrClause *C) {

View File

@ -2151,9 +2151,25 @@ void OMPClauseWriter::VisitOMPFromClause(OMPFromClause *C) {
void OMPClauseWriter::VisitOMPUseDevicePtrClause(OMPUseDevicePtrClause *C) { void OMPClauseWriter::VisitOMPUseDevicePtrClause(OMPUseDevicePtrClause *C) {
Record.push_back(C->varlist_size()); Record.push_back(C->varlist_size());
Record.push_back(C->getUniqueDeclarationsNum());
Record.push_back(C->getTotalComponentListNum());
Record.push_back(C->getTotalComponentsNum());
Record.AddSourceLocation(C->getLParenLoc()); Record.AddSourceLocation(C->getLParenLoc());
for (auto *VE : C->varlists()) { for (auto *E : C->varlists())
Record.AddStmt(E);
for (auto *VE : C->private_copies())
Record.AddStmt(VE); Record.AddStmt(VE);
for (auto *VE : C->inits())
Record.AddStmt(VE);
for (auto *D : C->all_decls())
Record.AddDeclRef(D);
for (auto N : C->all_num_lists())
Record.push_back(N);
for (auto N : C->all_lists_sizes())
Record.push_back(N);
for (auto &M : C->all_components()) {
Record.AddStmt(M.getAssociatedExpression());
Record.AddDeclRef(M.getAssociatedDeclaration());
} }
} }

View File

@ -0,0 +1,464 @@
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
///==========================================================================///
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
// RUN: %clang_cc1 -DCK1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
// RUN: %clang_cc1 -DCK1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
#ifdef CK1
double *g;
// CK1: @g = global double*
// CK1: [[MTYPE00:@.+]] = {{.*}}constant [1 x i32] [i32 99]
// CK1: [[MTYPE01:@.+]] = {{.*}}constant [1 x i32] [i32 99]
// CK1: [[MTYPE03:@.+]] = {{.*}}constant [1 x i32] [i32 99]
// CK1: [[MTYPE04:@.+]] = {{.*}}constant [1 x i32] [i32 99]
// CK1: [[MTYPE05:@.+]] = {{.*}}constant [1 x i32] [i32 99]
// CK1: [[MTYPE06:@.+]] = {{.*}}constant [1 x i32] [i32 99]
// CK1: [[MTYPE07:@.+]] = {{.*}}constant [1 x i32] [i32 99]
// CK1: [[MTYPE08:@.+]] = {{.*}}constant [2 x i32] [{{i32 35, i32 99|i32 99, i32 35}}]
// CK1: [[MTYPE09:@.+]] = {{.*}}constant [2 x i32] [i32 99, i32 99]
// CK1: [[MTYPE10:@.+]] = {{.*}}constant [2 x i32] [i32 99, i32 99]
// CK1: [[MTYPE11:@.+]] = {{.*}}constant [2 x i32] [i32 96, i32 35]
// CK1: [[MTYPE12:@.+]] = {{.*}}constant [2 x i32] [i32 96, i32 35]
// CK1-LABEL: @_Z3foo
template<typename T>
void foo(float *&lr, T *&tr) {
float *l;
T *t;
// CK1-DAG: [[RVAL:%.+]] = bitcast double* [[T:%.+]] to i8*
// CK1-DAG: [[T]] = load double*, double** [[DECL:@g]],
// CK1: [[BP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %{{.+}}, i32 0, i32 0
// CK1: store i8* [[RVAL]], i8** [[BP]],
// CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE00]]
// CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to double**
// CK1: [[VAL:%.+]] = load double*, double** [[CBP]],
// CK1-NOT: store double* [[VAL]], double** [[DECL]],
// CK1: store double* [[VAL]], double** [[PVT:%.+]],
// CK1: [[TT:%.+]] = load double*, double** [[PVT]],
// CK1: getelementptr inbounds double, double* [[TT]], i32 1
#pragma omp target data map(g[:10]) use_device_ptr(g)
{
++g;
}
// CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE00]]
// CK1: [[TTT:%.+]] = load double*, double** [[DECL]],
// CK1: getelementptr inbounds double, double* [[TTT]], i32 1
++g;
// CK1-DAG: [[RVAL:%.+]] = bitcast float* [[T1:%.+]] to i8*
// CK1-DAG: [[T1]] = load float*, float** [[DECL:%.+]],
// CK1: [[BP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %{{.+}}, i32 0, i32 0
// CK1: store i8* [[RVAL]], i8** [[BP]],
// CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE01]]
// CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to float**
// CK1: [[VAL:%.+]] = load float*, float** [[CBP]],
// CK1-NOT: store float* [[VAL]], float** [[DECL]],
// CK1: store float* [[VAL]], float** [[PVT:%.+]],
// CK1: [[TT1:%.+]] = load float*, float** [[PVT]],
// CK1: getelementptr inbounds float, float* [[TT1]], i32 1
#pragma omp target data map(l[:10]) use_device_ptr(l)
{
++l;
}
// CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE01]]
// CK1: [[TTT:%.+]] = load float*, float** [[DECL]],
// CK1: getelementptr inbounds float, float* [[TTT]], i32 1
++l;
// CK1-NOT: call void @__tgt_target
// CK1: [[TTT:%.+]] = load float*, float** [[DECL]],
// CK1: getelementptr inbounds float, float* [[TTT]], i32 1
#pragma omp target data map(l[:10]) use_device_ptr(l) if(0)
{
++l;
}
// CK1-NOT: call void @__tgt_target
// CK1: [[TTT:%.+]] = load float*, float** [[DECL]],
// CK1: getelementptr inbounds float, float* [[TTT]], i32 1
++l;
// CK1-DAG: [[RVAL:%.+]] = bitcast float* [[T1:%.+]] to i8*
// CK1-DAG: [[T1]] = load float*, float** [[DECL:%.+]],
// CK1: [[BP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %{{.+}}, i32 0, i32 0
// CK1: store i8* [[RVAL]], i8** [[BP]],
// CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE03]]
// CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to float**
// CK1: [[VAL:%.+]] = load float*, float** [[CBP]],
// CK1-NOT: store float* [[VAL]], float** [[DECL]],
// CK1: store float* [[VAL]], float** [[PVT:%.+]],
// CK1: [[TT1:%.+]] = load float*, float** [[PVT]],
// CK1: getelementptr inbounds float, float* [[TT1]], i32 1
#pragma omp target data map(l[:10]) use_device_ptr(l) if(1)
{
++l;
}
// CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE03]]
// CK1: [[TTT:%.+]] = load float*, float** [[DECL]],
// CK1: getelementptr inbounds float, float* [[TTT]], i32 1
++l;
// CK1: [[CMP:%.+]] = icmp ne float* %{{.+}}, null
// CK1: br i1 [[CMP]], label %[[BTHEN:.+]], label %[[BELSE:.+]]
// CK1: [[BTHEN]]:
// CK1-DAG: [[RVAL:%.+]] = bitcast float* [[T1:%.+]] to i8*
// CK1-DAG: [[T1]] = load float*, float** [[DECL:%.+]],
// CK1: [[BP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %{{.+}}, i32 0, i32 0
// CK1: store i8* [[RVAL]], i8** [[BP]],
// CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE04]]
// CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to float**
// CK1: [[VAL:%.+]] = load float*, float** [[CBP]],
// CK1-NOT: store float* [[VAL]], float** [[DECL]],
// CK1: store float* [[VAL]], float** [[PVT:%.+]],
// CK1: [[TT1:%.+]] = load float*, float** [[PVT]],
// CK1: getelementptr inbounds float, float* [[TT1]], i32 1
// CK1: br label %[[BEND:.+]]
// CK1: [[BELSE]]:
// CK1: [[TTT:%.+]] = load float*, float** [[DECL]],
// CK1: getelementptr inbounds float, float* [[TTT]], i32 1
// CK1: br label %[[BEND]]
#pragma omp target data map(l[:10]) use_device_ptr(l) if(lr != 0)
{
++l;
}
// CK1: [[BEND]]:
// CK1: [[CMP:%.+]] = icmp ne float* %{{.+}}, null
// CK1: br i1 [[CMP]], label %[[BTHEN:.+]], label %[[BELSE:.+]]
// CK1: [[BTHEN]]:
// CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE04]]
// CK1: br label %[[BEND:.+]]
// CK1: [[BELSE]]:
// CK1: br label %[[BEND]]
// CK1: [[BEND]]:
// CK1: [[TTT:%.+]] = load float*, float** [[DECL]],
// CK1: getelementptr inbounds float, float* [[TTT]], i32 1
++l;
// CK1-DAG: [[RVAL:%.+]] = bitcast float* [[T1:%.+]] to i8*
// CK1-DAG: [[T1]] = load float*, float** [[T2:%.+]],
// CK1-DAG: [[T2]] = load float**, float*** [[DECL:%.+]],
// CK1: [[BP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %{{.+}}, i32 0, i32 0
// CK1: store i8* [[RVAL]], i8** [[BP]],
// CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE05]]
// CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to float**
// CK1: [[VAL:%.+]] = load float*, float** [[CBP]],
// CK1: store float* [[VAL]], float** [[PVTV:%.+]],
// CK1-NOT: store float** [[PVTV]], float*** [[DECL]],
// CK1: store float** [[PVTV]], float*** [[PVT:%.+]],
// CK1: [[TT1:%.+]] = load float**, float*** [[PVT]],
// CK1: [[TT2:%.+]] = load float*, float** [[TT1]],
// CK1: getelementptr inbounds float, float* [[TT2]], i32 1
#pragma omp target data map(lr[:10]) use_device_ptr(lr)
{
++lr;
}
// CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE05]]
// CK1: [[TTT:%.+]] = load float**, float*** [[DECL]],
// CK1: [[TTTT:%.+]] = load float*, float** [[TTT]],
// CK1: getelementptr inbounds float, float* [[TTTT]], i32 1
++lr;
// CK1-DAG: [[RVAL:%.+]] = bitcast i32* [[T1:%.+]] to i8*
// CK1-DAG: [[T1]] = load i32*, i32** [[DECL:%.+]],
// CK1: [[BP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %{{.+}}, i32 0, i32 0
// CK1: store i8* [[RVAL]], i8** [[BP]],
// CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE06]]
// CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to i32**
// CK1: [[VAL:%.+]] = load i32*, i32** [[CBP]],
// CK1-NOT: store i32* [[VAL]], i32** [[DECL]],
// CK1: store i32* [[VAL]], i32** [[PVT:%.+]],
// CK1: [[TT1:%.+]] = load i32*, i32** [[PVT]],
// CK1: getelementptr inbounds i32, i32* [[TT1]], i32 1
#pragma omp target data map(t[:10]) use_device_ptr(t)
{
++t;
}
// CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE06]]
// CK1: [[TTT:%.+]] = load i32*, i32** [[DECL]],
// CK1: getelementptr inbounds i32, i32* [[TTT]], i32 1
++t;
// CK1-DAG: [[RVAL:%.+]] = bitcast i32* [[T1:%.+]] to i8*
// CK1-DAG: [[T1]] = load i32*, i32** [[T2:%.+]],
// CK1-DAG: [[T2]] = load i32**, i32*** [[DECL:%.+]],
// CK1: [[BP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %{{.+}}, i32 0, i32 0
// CK1: store i8* [[RVAL]], i8** [[BP]],
// CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE07]]
// CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to i32**
// CK1: [[VAL:%.+]] = load i32*, i32** [[CBP]],
// CK1: store i32* [[VAL]], i32** [[PVTV:%.+]],
// CK1-NOT: store i32** [[PVTV]], i32*** [[DECL]],
// CK1: store i32** [[PVTV]], i32*** [[PVT:%.+]],
// CK1: [[TT1:%.+]] = load i32**, i32*** [[PVT]],
// CK1: [[TT2:%.+]] = load i32*, i32** [[TT1]],
// CK1: getelementptr inbounds i32, i32* [[TT2]], i32 1
#pragma omp target data map(tr[:10]) use_device_ptr(tr)
{
++tr;
}
// CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE07]]
// CK1: [[TTT:%.+]] = load i32**, i32*** [[DECL]],
// CK1: [[TTTT:%.+]] = load i32*, i32** [[TTT]],
// CK1: getelementptr inbounds i32, i32* [[TTTT]], i32 1
++tr;
// CK1-DAG: [[RVAL:%.+]] = bitcast float* [[T1:%.+]] to i8*
// CK1-DAG: [[T1]] = load float*, float** [[DECL:%.+]],
// CK1: [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32
// CK1: store i8* [[RVAL]], i8** [[BP]],
// CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE08]]
// CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to float**
// CK1: [[VAL:%.+]] = load float*, float** [[CBP]],
// CK1-NOT: store float* [[VAL]], float** [[DECL]],
// CK1: store float* [[VAL]], float** [[PVT:%.+]],
// CK1: [[TT1:%.+]] = load float*, float** [[PVT]],
// CK1: getelementptr inbounds float, float* [[TT1]], i32 1
#pragma omp target data map(l[:10], t[:10]) use_device_ptr(l)
{
++l; ++t;
}
// CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE08]]
// CK1: [[TTT:%.+]] = load float*, float** [[DECL]],
// CK1: getelementptr inbounds float, float* [[TTT]], i32 1
++l; ++t;
// CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE09]]
// CK1: [[_CBP:%.+]] = bitcast i8** {{%.+}} to float**
// CK1: [[_VAL:%.+]] = load float*, float** [[_CBP]],
// CK1: store float* [[_VAL]], float** [[_PVT:%.+]],
// CK1: [[CBP:%.+]] = bitcast i8** {{%.+}} to i32**
// CK1: [[VAL:%.+]] = load i32*, i32** [[CBP]],
// CK1: store i32* [[VAL]], i32** [[PVT:%.+]],
// CK1: [[_TT1:%.+]] = load float*, float** [[_PVT]],
// CK1: getelementptr inbounds float, float* [[_TT1]], i32 1
// CK1: [[TT1:%.+]] = load i32*, i32** [[PVT]],
// CK1: getelementptr inbounds i32, i32* [[TT1]], i32 1
#pragma omp target data map(l[:10], t[:10]) use_device_ptr(l) use_device_ptr(t)
{
++l; ++t;
}
// CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE09]]
// CK1: [[_TTT:%.+]] = load float*, float** {{%.+}},
// CK1: getelementptr inbounds float, float* [[_TTT]], i32 1
// CK1: [[TTT:%.+]] = load i32*, i32** {{%.+}},
// CK1: getelementptr inbounds i32, i32* [[TTT]], i32 1
++l; ++t;
// CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE10]]
// CK1: [[_CBP:%.+]] = bitcast i8** {{%.+}} to float**
// CK1: [[_VAL:%.+]] = load float*, float** [[_CBP]],
// CK1: store float* [[_VAL]], float** [[_PVT:%.+]],
// CK1: [[CBP:%.+]] = bitcast i8** {{%.+}} to i32**
// CK1: [[VAL:%.+]] = load i32*, i32** [[CBP]],
// CK1: store i32* [[VAL]], i32** [[PVT:%.+]],
// CK1: [[_TT1:%.+]] = load float*, float** [[_PVT]],
// CK1: getelementptr inbounds float, float* [[_TT1]], i32 1
// CK1: [[TT1:%.+]] = load i32*, i32** [[PVT]],
// CK1: getelementptr inbounds i32, i32* [[TT1]], i32 1
#pragma omp target data map(l[:10], t[:10]) use_device_ptr(l,t)
{
++l; ++t;
}
// CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE10]]
// CK1: [[_TTT:%.+]] = load float*, float** {{%.+}},
// CK1: getelementptr inbounds float, float* [[_TTT]], i32 1
// CK1: [[TTT:%.+]] = load i32*, i32** {{%.+}},
// CK1: getelementptr inbounds i32, i32* [[TTT]], i32 1
++l; ++t;
// CK1-DAG: [[RVAL:%.+]] = bitcast i32* [[T1:%.+]] to i8*
// CK1-DAG: [[T1]] = load i32*, i32** [[DECL:%.+]],
// CK1: [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 0
// CK1: store i8* [[RVAL]], i8** [[BP]],
// CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE11]]
// CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to i32**
// CK1: [[VAL:%.+]] = load i32*, i32** [[CBP]],
// CK1-NOT: store i32* [[VAL]], i32** [[DECL]],
// CK1: store i32* [[VAL]], i32** [[PVT:%.+]],
// CK1: [[TT1:%.+]] = load i32*, i32** [[PVT]],
// CK1: getelementptr inbounds i32, i32* [[TT1]], i32 1
#pragma omp target data map(l[:10]) use_device_ptr(t)
{
++l; ++t;
}
// CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE11]]
// CK1: [[TTT:%.+]] = load i32*, i32** [[DECL]],
// CK1: getelementptr inbounds i32, i32* [[TTT]], i32 1
++l; ++t;
// CK1-DAG: [[RVAL:%.+]] = bitcast i32* [[T1:%.+]] to i8*
// CK1-DAG: [[T1]] = load i32*, i32** [[T2:%.+]],
// CK1-DAG: [[T2]] = load i32**, i32*** [[DECL:%.+]],
// CK1: [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 0
// CK1: store i8* [[RVAL]], i8** [[BP]],
// CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE12]]
// CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to i32**
// CK1: [[VAL:%.+]] = load i32*, i32** [[CBP]],
// CK1: store i32* [[VAL]], i32** [[PVTV:%.+]],
// CK1-NOT: store i32** [[PVTV]], i32*** [[DECL]],
// CK1: store i32** [[PVTV]], i32*** [[PVT:%.+]],
// CK1: [[TT1:%.+]] = load i32**, i32*** [[PVT]],
// CK1: [[TT2:%.+]] = load i32*, i32** [[TT1]],
// CK1: getelementptr inbounds i32, i32* [[TT2]], i32 1
#pragma omp target data map(l[:10]) use_device_ptr(tr)
{
++l; ++tr;
}
// CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE12]]
// CK1: [[TTT:%.+]] = load i32**, i32*** [[DECL]],
// CK1: [[TTTT:%.+]] = load i32*, i32** [[TTT]],
// CK1: getelementptr inbounds i32, i32* [[TTTT]], i32 1
++l; ++tr;
}
void bar(float *&a, int *&b) {
foo<int>(a,b);
}
#endif
///==========================================================================///
// RUN: %clang_cc1 -DCK2 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
// RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
// RUN: %clang_cc1 -DCK2 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
// RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
#ifdef CK2
// CK2: [[ST:%.+]] = type { double*, double** }
// CK2: [[MTYPE00:@.+]] = {{.*}}constant [2 x i32] [i32 35, i32 83]
// CK2: [[MTYPE01:@.+]] = {{.*}}constant [3 x i32] [i32 32, i32 19, i32 83]
// CK2: [[MTYPE02:@.+]] = {{.*}}constant [2 x i32] [i32 96, i32 35]
// CK2: [[MTYPE03:@.+]] = {{.*}}constant [4 x i32] [i32 96, i32 32, i32 19, i32 83]
template <typename T>
struct ST {
T *a;
double *&b;
ST(double *&b) : a(0), b(b) {}
// CK2-LABEL: @{{.*}}foo{{.*}}
void foo(double *&arg) {
int *la = 0;
// CK2: [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 1
// CK2: store i8* [[RVAL:%.+]], i8** [[BP]],
// CK2: call void @__tgt_target_data_begin{{.+}}[[MTYPE00]]
// CK2: [[CBP:%.+]] = bitcast i8** [[BP]] to double**
// CK2: [[VAL:%.+]] = load double*, double** [[CBP]],
// CK2: store double* [[VAL]], double** [[PVT:%.+]],
// CK2: store double** [[PVT]], double*** [[PVT2:%.+]],
// CK2: [[TT1:%.+]] = load double**, double*** [[PVT2]],
// CK2: [[TT2:%.+]] = load double*, double** [[TT1]],
// CK2: getelementptr inbounds double, double* [[TT2]], i32 1
#pragma omp target data map(a[:10]) use_device_ptr(a)
{
a++;
}
// CK2: call void @__tgt_target_data_end{{.+}}[[MTYPE00]]
// CK2: [[DECL:%.+]] = getelementptr inbounds [[ST]], [[ST]]* %this1, i32 0, i32 0
// CK2: [[TTT:%.+]] = load double*, double** [[DECL]],
// CK2: getelementptr inbounds double, double* [[TTT]], i32 1
a++;
// CK2: [[BP:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* %{{.+}}, i32 0, i32 2
// CK2: store i8* [[RVAL:%.+]], i8** [[BP]],
// CK2: call void @__tgt_target_data_begin{{.+}}[[MTYPE01]]
// CK2: [[CBP:%.+]] = bitcast i8** [[BP]] to double**
// CK2: [[VAL:%.+]] = load double*, double** [[CBP]],
// CK2: store double* [[VAL]], double** [[PVT:%.+]],
// CK2: store double** [[PVT]], double*** [[PVT2:%.+]],
// CK2: [[TT1:%.+]] = load double**, double*** [[PVT2]],
// CK2: [[TT2:%.+]] = load double*, double** [[TT1]],
// CK2: getelementptr inbounds double, double* [[TT2]], i32 1
#pragma omp target data map(b[:10]) use_device_ptr(b)
{
b++;
}
// CK2: call void @__tgt_target_data_end{{.+}}[[MTYPE01]]
// CK2: [[DECL:%.+]] = getelementptr inbounds [[ST]], [[ST]]* %{{.+}}, i32 0, i32 1
// CK2: [[TTT:%.+]] = load double**, double*** [[DECL]],
// CK2: [[TTTT:%.+]] = load double*, double** [[TTT]],
// CK2: getelementptr inbounds double, double* [[TTTT]], i32 1
b++;
// CK2: [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 0
// CK2: store i8* [[RVAL:%.+]], i8** [[BP]],
// CK2: call void @__tgt_target_data_begin{{.+}}[[MTYPE02]]
// CK2: [[CBP:%.+]] = bitcast i8** [[BP]] to double**
// CK2: [[VAL:%.+]] = load double*, double** [[CBP]],
// CK2: store double* [[VAL]], double** [[PVT:%.+]],
// CK2: store double** [[PVT]], double*** [[PVT2:%.+]],
// CK2: [[TT1:%.+]] = load double**, double*** [[PVT2]],
// CK2: [[TT2:%.+]] = load double*, double** [[TT1]],
// CK2: getelementptr inbounds double, double* [[TT2]], i32 1
#pragma omp target data map(la[:10]) use_device_ptr(a)
{
a++;
la++;
}
// CK2: call void @__tgt_target_data_end{{.+}}[[MTYPE02]]
// CK2: [[DECL:%.+]] = getelementptr inbounds [[ST]], [[ST]]* %this1, i32 0, i32 0
// CK2: [[TTT:%.+]] = load double*, double** [[DECL]],
// CK2: getelementptr inbounds double, double* [[TTT]], i32 1
a++;
la++;
// CK2: [[BP:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* %{{.+}}, i32 0, i32 0
// CK2: store i8* [[RVAL:%.+]], i8** [[BP]],
// CK2: [[_BP:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* %{{.+}}, i32 0, i32 3
// CK2: store i8* [[_RVAL:%.+]], i8** [[_BP]],
// CK2: call void @__tgt_target_data_begin{{.+}}[[MTYPE03]]
// CK2: [[CBP:%.+]] = bitcast i8** [[BP]] to double**
// CK2: [[VAL:%.+]] = load double*, double** [[CBP]],
// CK2: store double* [[VAL]], double** [[PVT:%.+]],
// CK2: store double** [[PVT]], double*** [[PVT2:%.+]],
// CK2: [[_CBP:%.+]] = bitcast i8** [[_BP]] to double**
// CK2: [[_VAL:%.+]] = load double*, double** [[_CBP]],
// CK2: store double* [[_VAL]], double** [[_PVT:%.+]],
// CK2: store double** [[_PVT]], double*** [[_PVT2:%.+]],
// CK2: [[TT1:%.+]] = load double**, double*** [[PVT2]],
// CK2: [[TT2:%.+]] = load double*, double** [[TT1]],
// CK2: getelementptr inbounds double, double* [[TT2]], i32 1
// CK2: [[_TT1:%.+]] = load double**, double*** [[_PVT2]],
// CK2: [[_TT2:%.+]] = load double*, double** [[_TT1]],
// CK2: getelementptr inbounds double, double* [[_TT2]], i32 1
#pragma omp target data map(b[:10]) use_device_ptr(a, b)
{
a++;
b++;
}
// CK2: call void @__tgt_target_data_end{{.+}}[[MTYPE03]]
// CK2: [[DECL:%.+]] = getelementptr inbounds [[ST]], [[ST]]* %this1, i32 0, i32 0
// CK2: [[TTT:%.+]] = load double*, double** [[DECL]],
// CK2: getelementptr inbounds double, double* [[TTT]], i32 1
// CK2: [[_DECL:%.+]] = getelementptr inbounds [[ST]], [[ST]]* %this1, i32 0, i32 1
// CK2: [[_TTT:%.+]] = load double**, double*** [[_DECL]],
// CK2: [[_TTTT:%.+]] = load double*, double** [[_TTT]],
// CK2: getelementptr inbounds double, double* [[_TTTT]], i32 1
a++;
b++;
}
};
void bar(double *arg){
ST<double> A(arg);
A.foo(arg);
++arg;
}
#endif
#endif