forked from OSchip/llvm-project
Revert "[OPENMP][DEBUG] Set proper address space info if required by target."
This reverts commit r310360. llvm-svn: 310364
This commit is contained in:
parent
04f06d922f
commit
6a824b9a45
|
@ -2685,14 +2685,6 @@ def OMPCaptureNoInit : InheritableAttr {
|
|||
let Documentation = [Undocumented];
|
||||
}
|
||||
|
||||
def OMPCaptureKind : Attr {
|
||||
// This attribute has no spellings as it is only ever created implicitly.
|
||||
let Spellings = [];
|
||||
let SemaHandler = 0;
|
||||
let Args = [UnsignedArgument<"CaptureKind">];
|
||||
let Documentation = [Undocumented];
|
||||
}
|
||||
|
||||
def OMPDeclareSimdDecl : Attr {
|
||||
let Spellings = [Pragma<"omp", "declare simd">];
|
||||
let Subjects = SubjectList<[Function]>;
|
||||
|
|
|
@ -8527,11 +8527,6 @@ public:
|
|||
/// is performed.
|
||||
bool isOpenMPPrivateDecl(ValueDecl *D, unsigned Level);
|
||||
|
||||
/// Sets OpenMP capture kind (OMPC_private, OMPC_firstprivate, OMPC_map etc.)
|
||||
/// for \p FD based on DSA for the provided corresponding captured declaration
|
||||
/// \p D.
|
||||
void setOpenMPCaptureKind(FieldDecl *FD, ValueDecl *D, unsigned Level);
|
||||
|
||||
/// \brief Check if the specified variable is captured by 'target' directive.
|
||||
/// \param Level Relative level of nested OpenMP construct for that the check
|
||||
/// is performed.
|
||||
|
|
|
@ -1325,32 +1325,6 @@ public:
|
|||
virtual void emitDoacrossOrdered(CodeGenFunction &CGF,
|
||||
const OMPDependClause *C);
|
||||
|
||||
/// Translates the native parameter of outlined function if this is required
|
||||
/// for target.
|
||||
/// \param FD Field decl from captured record for the paramater.
|
||||
/// \param NativeParam Parameter itself.
|
||||
virtual const VarDecl *translateParameter(const FieldDecl *FD,
|
||||
const VarDecl *NativeParam) const {
|
||||
return NativeParam;
|
||||
}
|
||||
|
||||
typedef llvm::function_ref<void(CodeGenFunction &, const VarDecl *, Address)>
|
||||
MappingFnType;
|
||||
/// Maps the native argument to the address of the corresponding
|
||||
/// target-specific argument.
|
||||
/// \param FD Field decl from captured record for the paramater.
|
||||
/// \param NativeParam Parameter itself.
|
||||
/// \param TargetParam Corresponding target-specific parameter.
|
||||
/// \param MapFn Function that maps the native parameter to the address of the
|
||||
/// target-specific.
|
||||
virtual void mapParameterAddress(CodeGenFunction &CGF, const FieldDecl *FD,
|
||||
const VarDecl *NativeParam,
|
||||
const VarDecl *TargetParam,
|
||||
const MappingFnType) const {
|
||||
assert(NativeParam == TargetParam &&
|
||||
"native and target args must be the same");
|
||||
}
|
||||
|
||||
/// Emits call of the outlined function with the provided arguments,
|
||||
/// translating these arguments to correct target-specific arguments.
|
||||
virtual void
|
||||
|
|
|
@ -2238,81 +2238,3 @@ void CGOpenMPRuntimeNVPTX::emitReduction(
|
|||
CGF.EmitBranch(DefaultBB);
|
||||
CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
|
||||
}
|
||||
|
||||
const VarDecl *
|
||||
CGOpenMPRuntimeNVPTX::translateParameter(const FieldDecl *FD,
|
||||
const VarDecl *NativeParam) const {
|
||||
if (!NativeParam->getType()->isReferenceType())
|
||||
return NativeParam;
|
||||
QualType ArgType = NativeParam->getType();
|
||||
QualifierCollector QC;
|
||||
const Type *NonQualTy = QC.strip(ArgType);
|
||||
QualType PointeeTy = cast<ReferenceType>(NonQualTy)->getPointeeType();
|
||||
if (const auto *Attr = FD->getAttr<OMPCaptureKindAttr>()) {
|
||||
if (Attr->getCaptureKind() == OMPC_map) {
|
||||
PointeeTy = CGM.getContext().getAddrSpaceQualType(PointeeTy,
|
||||
LangAS::opencl_global);
|
||||
}
|
||||
}
|
||||
ArgType = CGM.getContext().getPointerType(PointeeTy);
|
||||
QC.addRestrict();
|
||||
enum { NVPTX_local_addr = 5 };
|
||||
QC.addAddressSpace(NVPTX_local_addr);
|
||||
ArgType = QC.apply(CGM.getContext(), ArgType);
|
||||
return ImplicitParamDecl::Create(
|
||||
CGM.getContext(), /*DC=*/nullptr, NativeParam->getLocation(),
|
||||
NativeParam->getIdentifier(), ArgType, ImplicitParamDecl::Other);
|
||||
}
|
||||
|
||||
void CGOpenMPRuntimeNVPTX::mapParameterAddress(
|
||||
CodeGenFunction &CGF, const FieldDecl *FD, const VarDecl *NativeParam,
|
||||
const VarDecl *TargetParam,
|
||||
const CGOpenMPRuntime::MappingFnType MapFn) const {
|
||||
assert(NativeParam != TargetParam &&
|
||||
NativeParam->getType()->isReferenceType() &&
|
||||
"Native arg must not be the same as target arg.");
|
||||
Address LocalAddr = CGF.GetAddrOfLocalVar(TargetParam);
|
||||
QualType NativeParamType = NativeParam->getType();
|
||||
QualifierCollector QC;
|
||||
const Type *NonQualTy = QC.strip(NativeParamType);
|
||||
QualType NativePointeeTy = cast<ReferenceType>(NonQualTy)->getPointeeType();
|
||||
unsigned NativePointeeAddrSpace =
|
||||
NativePointeeTy.getQualifiers().getAddressSpace();
|
||||
QualType TargetPointeeTy = TargetParam->getType()->getPointeeType();
|
||||
llvm::Value *TargetAddr = CGF.EmitLoadOfScalar(
|
||||
LocalAddr, /*Volatile=*/false, TargetPointeeTy, SourceLocation());
|
||||
// First cast to generic.
|
||||
TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
||||
TargetAddr, TargetAddr->getType()->getPointerElementType()->getPointerTo(
|
||||
/*AddrSpace=*/0));
|
||||
// Cast from generic to native address space.
|
||||
TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
||||
TargetAddr, TargetAddr->getType()->getPointerElementType()->getPointerTo(
|
||||
NativePointeeAddrSpace));
|
||||
Address NativeParamAddr = CGF.CreateMemTemp(NativeParamType);
|
||||
CGF.EmitStoreOfScalar(TargetAddr, NativeParamAddr, /*Volatile=*/false,
|
||||
NativeParam->getType());
|
||||
MapFn(CGF, NativeParam, NativeParamAddr);
|
||||
}
|
||||
|
||||
void CGOpenMPRuntimeNVPTX::emitOutlinedFunctionCall(
|
||||
CodeGenFunction &CGF, llvm::Value *OutlinedFn,
|
||||
ArrayRef<llvm::Value *> Args) const {
|
||||
SmallVector<llvm::Value *, 4> TargetArgs;
|
||||
auto *FnType =
|
||||
cast<llvm::FunctionType>(OutlinedFn->getType()->getPointerElementType());
|
||||
for (unsigned I = 0, E = Args.size(); I < E; ++I) {
|
||||
llvm::Type *TargetType = FnType->getParamType(I);
|
||||
llvm::Value *NativeArg = Args[I];
|
||||
if (!TargetType->isPointerTy()) {
|
||||
TargetArgs.emplace_back(NativeArg);
|
||||
continue;
|
||||
}
|
||||
llvm::Value *TargetArg = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
||||
NativeArg, NativeArg->getType()->getPointerElementType()->getPointerTo(
|
||||
/*AddrSpace=*/0));
|
||||
TargetArgs.emplace_back(
|
||||
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TargetArg, TargetType));
|
||||
}
|
||||
CGOpenMPRuntime::emitOutlinedFunctionCall(CGF, OutlinedFn, TargetArgs);
|
||||
}
|
||||
|
|
|
@ -268,31 +268,6 @@ public:
|
|||
/// \return Specified function.
|
||||
llvm::Constant *createNVPTXRuntimeFunction(unsigned Function);
|
||||
|
||||
/// Translates the native parameter of outlined function if this is required
|
||||
/// for target.
|
||||
/// \param FD Field decl from captured record for the paramater.
|
||||
/// \param NativeParam Parameter itself.
|
||||
const VarDecl *translateParameter(const FieldDecl *FD,
|
||||
const VarDecl *NativeParam) const override;
|
||||
|
||||
/// Maps the native argument to the address of the corresponding
|
||||
/// target-specific argument.
|
||||
/// \param FD Field decl from captured record for the paramater.
|
||||
/// \param NativeParam Parameter itself.
|
||||
/// \param TargetParam Corresponding target-specific parameter.
|
||||
/// \param MapFn Function that maps the native parameter to the address of the
|
||||
/// target-specific.
|
||||
void mapParameterAddress(CodeGenFunction &CGF, const FieldDecl *FD,
|
||||
const VarDecl *NativeParam,
|
||||
const VarDecl *TargetParam,
|
||||
const MappingFnType MapFn) const override;
|
||||
|
||||
/// Emits call of the outlined function with the provided arguments,
|
||||
/// translating these arguments to correct target-specific arguments.
|
||||
void emitOutlinedFunctionCall(
|
||||
CodeGenFunction &CGF, llvm::Value *OutlinedFn,
|
||||
ArrayRef<llvm::Value *> Args = llvm::None) const override;
|
||||
|
||||
/// Target codegen is specialized based on two programming models: the
|
||||
/// 'generic' fork-join model of OpenMP, and a more GPU efficient 'spmd'
|
||||
/// model for constructs like 'target parallel' that support it.
|
||||
|
|
|
@ -252,15 +252,12 @@ namespace {
|
|||
bool RegisterCastedArgsOnly = false;
|
||||
/// Name of the generated function.
|
||||
StringRef FunctionName;
|
||||
/// Function that maps given variable declaration to the specified address.
|
||||
const CGOpenMPRuntime::MappingFnType MapFn;
|
||||
explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
|
||||
bool RegisterCastedArgsOnly,
|
||||
StringRef FunctionName,
|
||||
const CGOpenMPRuntime::MappingFnType MapFn)
|
||||
StringRef FunctionName)
|
||||
: S(S), UIntPtrCastRequired(UIntPtrCastRequired),
|
||||
RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
|
||||
FunctionName(FunctionName), MapFn(MapFn) {}
|
||||
FunctionName(FunctionName) {}
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -279,13 +276,9 @@ static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue(
|
|||
// Build the argument list.
|
||||
CodeGenModule &CGM = CGF.CGM;
|
||||
ASTContext &Ctx = CGM.getContext();
|
||||
FunctionArgList TargetArgs;
|
||||
bool HasUIntPtrArgs = false;
|
||||
Args.append(CD->param_begin(),
|
||||
std::next(CD->param_begin(), CD->getContextParamPosition()));
|
||||
TargetArgs.append(
|
||||
CD->param_begin(),
|
||||
std::next(CD->param_begin(), CD->getContextParamPosition()));
|
||||
auto I = FO.S->captures().begin();
|
||||
for (auto *FD : RD->fields()) {
|
||||
QualType ArgType = FD->getType();
|
||||
|
@ -315,28 +308,19 @@ static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue(
|
|||
}
|
||||
if (ArgType->isVariablyModifiedType())
|
||||
ArgType = getCanonicalParamType(Ctx, ArgType.getNonReferenceType());
|
||||
auto *Arg =
|
||||
ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), II,
|
||||
ArgType, ImplicitParamDecl::Other);
|
||||
Args.emplace_back(Arg);
|
||||
// Do not cast arguments if we emit function with non-original types.
|
||||
TargetArgs.emplace_back(
|
||||
FO.UIntPtrCastRequired
|
||||
? Arg
|
||||
: CGM.getOpenMPRuntime().translateParameter(FD, Arg));
|
||||
Args.push_back(ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr,
|
||||
FD->getLocation(), II, ArgType,
|
||||
ImplicitParamDecl::Other));
|
||||
++I;
|
||||
}
|
||||
Args.append(
|
||||
std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
|
||||
CD->param_end());
|
||||
TargetArgs.append(
|
||||
std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
|
||||
CD->param_end());
|
||||
|
||||
// Create the function declaration.
|
||||
FunctionType::ExtInfo ExtInfo;
|
||||
const CGFunctionInfo &FuncInfo =
|
||||
CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
|
||||
CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
|
||||
llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
|
||||
|
||||
llvm::Function *F =
|
||||
|
@ -347,21 +331,16 @@ static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue(
|
|||
F->setDoesNotThrow();
|
||||
|
||||
// Generate the function.
|
||||
CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, CD->getLocation(),
|
||||
CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(),
|
||||
CD->getBody()->getLocStart());
|
||||
unsigned Cnt = CD->getContextParamPosition();
|
||||
I = FO.S->captures().begin();
|
||||
for (auto *FD : RD->fields()) {
|
||||
// Do not map arguments if we emit function with non-original types.
|
||||
if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
|
||||
CGM.getOpenMPRuntime().mapParameterAddress(CGF, FD, Args[Cnt],
|
||||
TargetArgs[Cnt], FO.MapFn);
|
||||
}
|
||||
Address LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
|
||||
// If we are capturing a pointer by copy we don't need to do anything, just
|
||||
// use the value that we get from the arguments.
|
||||
if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
|
||||
const VarDecl *CurVD = I->getCapturedVar();
|
||||
Address LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
|
||||
// If the variable is a reference we need to materialize it here.
|
||||
if (CurVD->getType()->isReferenceType()) {
|
||||
Address RefAddr = CGF.CreateMemTemp(
|
||||
|
@ -378,8 +357,8 @@ static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue(
|
|||
}
|
||||
|
||||
LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
|
||||
LValue ArgLVal =
|
||||
CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), BaseInfo);
|
||||
LValue ArgLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(Args[Cnt]),
|
||||
Args[Cnt]->getType(), BaseInfo);
|
||||
if (FD->hasCapturedVLAType()) {
|
||||
if (FO.UIntPtrCastRequired) {
|
||||
ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(),
|
||||
|
@ -449,15 +428,8 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
|
|||
FunctionArgList Args;
|
||||
llvm::DenseMap<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
|
||||
llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
|
||||
FunctionOptions FO(
|
||||
&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
|
||||
CapturedStmtInfo->getHelperName(),
|
||||
[NeedWrapperFunction](CodeGenFunction &CGF, const VarDecl *VD,
|
||||
Address Addr) {
|
||||
assert(NeedWrapperFunction && "Function should not be called if "
|
||||
"wrapper function is not required.");
|
||||
CGF.setAddrOfLocalVar(VD, Addr);
|
||||
});
|
||||
FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
|
||||
CapturedStmtInfo->getHelperName());
|
||||
llvm::Function *F;
|
||||
bool HasUIntPtrArgs;
|
||||
std::tie(F, HasUIntPtrArgs) = emitOutlinedFunctionPrologue(
|
||||
|
@ -480,10 +452,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
|
|||
llvm::raw_svector_ostream Out(Buffer);
|
||||
Out << "__nondebug_wrapper_" << CapturedStmtInfo->getHelperName();
|
||||
FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
|
||||
/*RegisterCastedArgsOnly=*/true, Out.str(),
|
||||
[](CodeGenFunction &, const VarDecl *, Address) {
|
||||
llvm_unreachable("Function should not be called");
|
||||
});
|
||||
/*RegisterCastedArgsOnly=*/true, Out.str());
|
||||
CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
|
||||
WrapperCGF.disableDebugInfo();
|
||||
Args.clear();
|
||||
|
|
|
@ -14013,8 +14013,6 @@ static bool captureInCapturedRegion(CapturedRegionScopeInfo *RSI,
|
|||
Field->setImplicit(true);
|
||||
Field->setAccess(AS_private);
|
||||
RD->addDecl(Field);
|
||||
if (S.getLangOpts().OpenMP && RSI->CapRegionKind == CR_OpenMP)
|
||||
S.setOpenMPCaptureKind(Field, Var, RSI->OpenMPLevel);
|
||||
|
||||
CopyExpr = new (S.Context) DeclRefExpr(Var, RefersToCapturedVariable,
|
||||
DeclRefType, VK_LValue, Loc);
|
||||
|
|
|
@ -1327,39 +1327,6 @@ bool Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level) {
|
|||
DSAStack->isTaskgroupReductionRef(D, Level));
|
||||
}
|
||||
|
||||
void Sema::setOpenMPCaptureKind(FieldDecl *FD, ValueDecl *D, unsigned Level) {
|
||||
assert(LangOpts.OpenMP && "OpenMP is not allowed");
|
||||
D = getCanonicalDecl(D);
|
||||
OpenMPClauseKind OMPC = OMPC_unknown;
|
||||
for (unsigned I = DSAStack->getNestingLevel() + 1; I > Level; --I) {
|
||||
const unsigned NewLevel = I - 1;
|
||||
if (DSAStack->hasExplicitDSA(D,
|
||||
[&OMPC](const OpenMPClauseKind K) {
|
||||
if (isOpenMPPrivate(K)) {
|
||||
OMPC = K;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
},
|
||||
NewLevel))
|
||||
break;
|
||||
if (DSAStack->checkMappableExprComponentListsForDeclAtLevel(
|
||||
D, NewLevel,
|
||||
[](OMPClauseMappableExprCommon::MappableExprComponentListRef,
|
||||
OpenMPClauseKind) { return true; })) {
|
||||
OMPC = OMPC_map;
|
||||
break;
|
||||
}
|
||||
if (DSAStack->hasExplicitDirective(isOpenMPTargetExecutionDirective,
|
||||
NewLevel)) {
|
||||
OMPC = OMPC_firstprivate;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (OMPC != OMPC_unknown)
|
||||
FD->addAttr(OMPCaptureKindAttr::CreateImplicit(Context, OMPC));
|
||||
}
|
||||
|
||||
bool Sema::isOpenMPTargetCapturedDecl(ValueDecl *D, unsigned Level) {
|
||||
assert(LangOpts.OpenMP && "OpenMP is not allowed");
|
||||
// Return true if the current level is no longer enclosed in a target region.
|
||||
|
|
|
@ -1,14 +1,15 @@
|
|||
|
||||
// Test target codegen - host bc file has to be created first.
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
|
||||
// RUN: %clang_cc1 -debug-info-kind=limited -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
|
||||
// RUN: %clang_cc1 -debug-info-kind=limited -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
|
||||
// expected-no-diagnostics
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
template <typename tx, typename ty>
|
||||
struct TT {
|
||||
template<typename tx, typename ty>
|
||||
struct TT{
|
||||
tx X;
|
||||
ty Y;
|
||||
};
|
||||
|
@ -22,32 +23,29 @@ int foo(int n, double *ptr) {
|
|||
float b[10];
|
||||
double c[5][10];
|
||||
TT<long long, char> d;
|
||||
|
||||
#pragma omp target firstprivate(a) map(tofrom \
|
||||
: b)
|
||||
|
||||
#pragma omp target firstprivate(a)
|
||||
{
|
||||
b[a] = a;
|
||||
}
|
||||
|
||||
// TCHECK: define {{.*}}void @__omp_offloading_{{.+}}([10 x float] addrspace(1)* noalias [[B_IN:%.+]], i{{[0-9]+}} [[A_IN:%.+]])
|
||||
|
||||
// TCHECK: define void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]])
|
||||
// TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}},
|
||||
// TCHECK-NOT: alloca i{{[0-9]+}},
|
||||
// TCHECK-64: call void @llvm.dbg.declare(metadata [10 x float] addrspace(1)** %{{.+}}, metadata !{{[0-9]+}}, metadata ![[LOCAL:[0-9]+]])
|
||||
// TCHECK: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]],
|
||||
// TCHECK: ret void
|
||||
// TCHECK: ret void
|
||||
|
||||
#pragma omp target firstprivate(aa, b, c, d)
|
||||
#pragma omp target firstprivate(aa,b,c,d)
|
||||
{
|
||||
aa += 1;
|
||||
b[2] = 1.0;
|
||||
c[1][2] = 1.0;
|
||||
d.X = 1;
|
||||
d.Y = 1;
|
||||
d.Y = 1;
|
||||
}
|
||||
|
||||
|
||||
// make sure that firstprivate variables are generated in all cases and that we use those instances for operations inside the
|
||||
// target region
|
||||
// TCHECK: define {{.*}}void @__omp_offloading_{{.+}}(i{{[0-9]+}}{{.*}} [[A2_IN:%.+]], [10 x float]*{{.*}} [[B_IN:%.+]], [5 x [10 x double]]*{{.*}} [[C_IN:%.+]], [[TT]]*{{.*}} [[D_IN:%.+]])
|
||||
// TCHECK: define void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A2_IN:%.+]], [10 x float]* {{.+}} [[B_IN:%.+]], [5 x [10 x double]]* {{.+}} [[C_IN:%.+]], [[TT]]* {{.+}} [[D_IN:%.+]])
|
||||
// TCHECK: [[A2_ADDR:%.+]] = alloca i{{[0-9]+}},
|
||||
// TCHECK: [[B_ADDR:%.+]] = alloca [10 x float]*,
|
||||
// TCHECK: [[C_ADDR:%.+]] = alloca [5 x [10 x double]]*,
|
||||
|
@ -60,12 +58,10 @@ int foo(int n, double *ptr) {
|
|||
// TCHECK: store [10 x float]* [[B_IN]], [10 x float]** [[B_ADDR]],
|
||||
// TCHECK: store [5 x [10 x double]]* [[C_IN]], [5 x [10 x double]]** [[C_ADDR]],
|
||||
// TCHECK: store [[TT]]* [[D_IN]], [[TT]]** [[D_ADDR]],
|
||||
// TCHECK: [[CONV_A2ADDR:%.+]] = bitcast i{{[0-9]+}}* [[A2_ADDR]] to i{{[0-9]+}}*
|
||||
// TCHECK: [[B_ADDR_REF:%.+]] = load [10 x float]*, [10 x float]** [[B_ADDR]],
|
||||
// TCHECK: [[B_ADDR_REF:%.+]] = load [10 x float]*, [10 x float]** %
|
||||
// TCHECK: [[C_ADDR_REF:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]],
|
||||
// TCHECK: [[C_ADDR_REF:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** %
|
||||
// TCHECK: [[D_ADDR_REF:%.+]] = load [[TT]]*, [[TT]]** [[D_ADDR]],
|
||||
// TCHECK: [[D_ADDR_REF:%.+]] = load [[TT]]*, [[TT]]** %
|
||||
|
||||
// firstprivate(aa): a_priv = a_in
|
||||
|
||||
|
@ -78,15 +74,16 @@ int foo(int n, double *ptr) {
|
|||
// TCHECK: [[C_PRIV_BCAST:%.+]] = bitcast [5 x [10 x double]]* [[C_PRIV]] to i8*
|
||||
// TCHECK: [[C_IN_BCAST:%.+]] = bitcast [5 x [10 x double]]* [[C_ADDR_REF]] to i8*
|
||||
// TCHECK: call void @llvm.memcpy.{{.+}}(i8* [[C_PRIV_BCAST]], i8* [[C_IN_BCAST]],{{.+}})
|
||||
|
||||
|
||||
// firstprivate(d)
|
||||
// TCHECK: [[D_PRIV_BCAST:%.+]] = bitcast [[TT]]* [[D_PRIV]] to i8*
|
||||
// TCHECK: [[D_IN_BCAST:%.+]] = bitcast [[TT]]* [[D_ADDR_REF]] to i8*
|
||||
// TCHECK: call void @llvm.memcpy.{{.+}}(i8* [[D_PRIV_BCAST]], i8* [[D_IN_BCAST]],{{.+}})
|
||||
|
||||
// TCHECK: load i16, i16* [[A2_ADDR]],
|
||||
// TCHECK: load i16, i16* [[CONV_A2ADDR]],
|
||||
|
||||
#pragma omp target firstprivate(ptr)
|
||||
|
||||
#pragma omp target firstprivate(ptr)
|
||||
{
|
||||
ptr[0]++;
|
||||
}
|
||||
|
@ -101,12 +98,13 @@ int foo(int n, double *ptr) {
|
|||
return a;
|
||||
}
|
||||
|
||||
template <typename tx>
|
||||
|
||||
template<typename tx>
|
||||
tx ftemplate(int n) {
|
||||
tx a = 0;
|
||||
tx b[10];
|
||||
|
||||
#pragma omp target firstprivate(a, b)
|
||||
#pragma omp target firstprivate(a,b)
|
||||
{
|
||||
a += 1;
|
||||
b[2] += 1;
|
||||
|
@ -115,12 +113,13 @@ tx ftemplate(int n) {
|
|||
return a;
|
||||
}
|
||||
|
||||
static int fstatic(int n) {
|
||||
static
|
||||
int fstatic(int n) {
|
||||
int a = 0;
|
||||
char aaa = 0;
|
||||
int b[10];
|
||||
|
||||
#pragma omp target firstprivate(a, aaa, b)
|
||||
#pragma omp target firstprivate(a,aaa,b)
|
||||
{
|
||||
a += 1;
|
||||
aaa += 1;
|
||||
|
@ -130,7 +129,7 @@ static int fstatic(int n) {
|
|||
return a;
|
||||
}
|
||||
|
||||
// TCHECK: define {{.*}}void @__omp_offloading_{{.+}}(i{{[0-9]+}}{{.*}} [[A_IN:%.+]], i{{[0-9]+}}{{.*}} [[A3_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]])
|
||||
// TCHECK: define void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]], i{{[0-9]+}} [[A3_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]])
|
||||
// TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}},
|
||||
// TCHECK: [[A3_ADDR:%.+]] = alloca i{{[0-9]+}},
|
||||
// TCHECK: [[B_ADDR:%.+]] = alloca [10 x i{{[0-9]+}}]*,
|
||||
|
@ -139,8 +138,9 @@ static int fstatic(int n) {
|
|||
// TCHECK: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]],
|
||||
// TCHECK: store i{{[0-9]+}} [[A3_IN]], i{{[0-9]+}}* [[A3_ADDR]],
|
||||
// TCHECK: store [10 x i{{[0-9]+}}]* [[B_IN]], [10 x i{{[0-9]+}}]** [[B_ADDR]],
|
||||
// TCHECK-64: [[A_CONV:%.+]] = bitcast i{{[0-9]+}}* [[A_ADDR]] to i{{[0-9]+}}*
|
||||
// TCHECK: [[A3_CONV:%.+]] = bitcast i{{[0-9]+}}* [[A3_ADDR]] to i8*
|
||||
// TCHECK: [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** [[B_ADDR]],
|
||||
// TCHECK: [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** %
|
||||
|
||||
// firstprivate(a): a_priv = a_in
|
||||
|
||||
|
@ -158,8 +158,8 @@ static int fstatic(int n) {
|
|||
struct S1 {
|
||||
double a;
|
||||
|
||||
int r1(int n) {
|
||||
int b = n + 1;
|
||||
int r1(int n){
|
||||
int b = n+1;
|
||||
|
||||
#pragma omp target firstprivate(b)
|
||||
{
|
||||
|
@ -169,7 +169,7 @@ struct S1 {
|
|||
return (int)b;
|
||||
}
|
||||
|
||||
// TCHECK: define internal void @__omp_offloading_{{.+}}([[S1]]* [[TH:%.+]], i{{[0-9]+}} [[B_IN:%.+]])
|
||||
// TCHECK: define void @__omp_offloading_{{.+}}([[S1]]* [[TH:%.+]], i{{[0-9]+}} [[B_IN:%.+]])
|
||||
// TCHECK: [[TH_ADDR:%.+]] = alloca [[S1]]*,
|
||||
// TCHECK: [[B_ADDR:%.+]] = alloca i{{[0-9]+}},
|
||||
// TCHECK-NOT: alloca i{{[0-9]+}},
|
||||
|
@ -185,7 +185,9 @@ struct S1 {
|
|||
// TCHECK: ret void
|
||||
};
|
||||
|
||||
int bar(int n, double *ptr) {
|
||||
|
||||
|
||||
int bar(int n, double *ptr){
|
||||
int a = 0;
|
||||
a += foo(n, ptr);
|
||||
S1 S;
|
||||
|
@ -198,15 +200,15 @@ int bar(int n, double *ptr) {
|
|||
|
||||
// template
|
||||
|
||||
// TCHECK: define internal void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]])
|
||||
// TCHECK: define void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]])
|
||||
// TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}},
|
||||
// TCHECK: [[B_ADDR:%.+]] = alloca [10 x i{{[0-9]+}}]*,
|
||||
// TCHECK-NOT: alloca i{{[0-9]+}},
|
||||
// TCHECK: [[B_PRIV:%.+]] = alloca [10 x i{{[0-9]+}}],
|
||||
// TCHECK: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]],
|
||||
// TCHECK: store [10 x i{{[0-9]+}}]* [[B_IN]], [10 x i{{[0-9]+}}]** [[B_ADDR]],
|
||||
// TCHECK-64: [[A_ADDR_CONV:%.+]] = bitcast i{{[0-9]+}}* [[A_ADDR]] to i{{[0-9]+}}*
|
||||
// TCHECK: [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** [[B_ADDR]],
|
||||
// TCHECK: [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** %
|
||||
|
||||
// firstprivate(a)
|
||||
// TCHECK-NOT: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}*
|
||||
|
|
|
@ -1,97 +0,0 @@
|
|||
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
|
||||
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited | FileCheck %s
|
||||
// expected-no-diagnostics
|
||||
|
||||
int main() {
|
||||
/* int(*b)[a]; */
|
||||
/* int *(**c)[a]; */
|
||||
int a;
|
||||
int b[10][10];
|
||||
int c[10][10][10];
|
||||
#pragma omp target parallel firstprivate(a, b) map(tofrom \
|
||||
: c)
|
||||
{
|
||||
int &f = c[1][1][1];
|
||||
int &g = a;
|
||||
int &h = b[1][1];
|
||||
int d = 15;
|
||||
a = 5;
|
||||
b[0][a] = 10;
|
||||
c[0][0][a] = 11;
|
||||
b[0][a] = c[0][0][a];
|
||||
}
|
||||
#pragma omp target parallel firstprivate(a) map(tofrom \
|
||||
: c, b)
|
||||
{
|
||||
int &f = c[1][1][1];
|
||||
int &g = a;
|
||||
int &h = b[1][1];
|
||||
int d = 15;
|
||||
a = 5;
|
||||
b[0][a] = 10;
|
||||
c[0][0][a] = 11;
|
||||
b[0][a] = c[0][0][a];
|
||||
}
|
||||
#pragma omp target parallel map(tofrom \
|
||||
: a, c, b)
|
||||
{
|
||||
int &f = c[1][1][1];
|
||||
int &g = a;
|
||||
int &h = b[1][1];
|
||||
int d = 15;
|
||||
a = 5;
|
||||
b[0][a] = 10;
|
||||
c[0][0][a] = 11;
|
||||
b[0][a] = c[0][0][a];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// CHECK: define internal void @__omp_offloading{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* {{[^)]+}})
|
||||
// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]*
|
||||
// CHECK: call void [[NONDEBUG_WRAPPER:.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* {{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* {{[^)]+}})
|
||||
|
||||
// CHECK: define internal void [[DEBUG_PARALLEL:@.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* noalias{{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* noalias{{[^)]+}})
|
||||
// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]*
|
||||
|
||||
// CHECK: define internal void [[NONDEBUG_WRAPPER]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^)]+}})
|
||||
// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)*
|
||||
// CHECK: call void [[DEBUG_PARALLEL]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* {{[^)]+}})
|
||||
|
||||
// CHECK: define void @__nondebug_wrapper___omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^)]+}})
|
||||
// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)*
|
||||
// CHECK: call void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* {{[^)]+}})
|
||||
|
||||
// CHECK: define internal void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* noalias {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* noalias {{[^)]+}})
|
||||
// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]*
|
||||
// CHECK: addrspacecast [10 x [10 x i32]] addrspace(1)* %{{.+}} to [10 x [10 x i32]]*
|
||||
// CHECK: call void [[NONDEBUG_WRAPPER:.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* {{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* {{[^)]+}})
|
||||
|
||||
// CHECK: define internal void [[DEBUG_PARALLEL:@.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* noalias{{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* noalias{{[^)]+}})
|
||||
// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]*
|
||||
// CHECK: addrspacecast [10 x [10 x i32]] addrspace(1)* %{{.+}} to [10 x [10 x i32]]*
|
||||
|
||||
// CHECK: define internal void [[NONDEBUG_WRAPPER]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^)]+}})
|
||||
// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)*
|
||||
// CHECK: addrspacecast [10 x [10 x i32]]* %{{.+}} to [10 x [10 x i32]] addrspace(1)*
|
||||
// CHECK: call void [[DEBUG_PARALLEL]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* {{[^)]+}})
|
||||
|
||||
// CHECK: define void @__nondebug_wrapper___omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^)]+}})
|
||||
// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)*
|
||||
// CHECK: addrspacecast [10 x [10 x i32]]* %{{.+}} to [10 x [10 x i32]] addrspace(1)*
|
||||
// CHECK: call void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* {{[^)]+}})
|
||||
|
||||
// CHECK: define void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* noalias {{[^,]+}}, i32 addrspace(1)* noalias {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* noalias {{[^)]+}})
|
||||
// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]*
|
||||
// CHECK: addrspacecast i32 addrspace(1)* %{{.+}} to i32*
|
||||
// CHECK: addrspacecast [10 x [10 x i32]] addrspace(1)* %{{.+}} to [10 x [10 x i32]]*
|
||||
// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)*
|
||||
// CHECK: addrspacecast i32* %{{.+}} to i32 addrspace(1)*
|
||||
// CHECK: addrspacecast [10 x [10 x i32]]* %{{.+}} to [10 x [10 x i32]] addrspace(1)*
|
||||
// CHECK: call void [[DEBUG_PARALLEL:@.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 addrspace(1)* {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* {{[^)]+}})
|
||||
|
||||
// CHECK: define internal void [[DEBUG_PARALLEL]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* noalias{{[^,]+}}, i32 addrspace(1)* noalias{{[^,]+}}, [10 x [10 x i32]] addrspace(1)* noalias{{[^)]+}})
|
||||
// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]*
|
||||
// CHECK: addrspacecast i32 addrspace(1)* %{{.+}} to i32*
|
||||
// CHECK: addrspacecast [10 x [10 x i32]] addrspace(1)* %{{.+}} to [10 x [10 x i32]]*
|
||||
|
Loading…
Reference in New Issue