diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 1f732f67517a..b2ea8bf8e39f 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -3192,14 +3192,17 @@ public: /// \brief Build 'device' clause. /// /// \param E Expression associated with this clause. + /// \param CaptureRegion Innermost OpenMP region where expressions in this + /// clause must be captured. /// \param StartLoc Starting location of the clause. /// \param LParenLoc Location of '('. /// \param EndLoc Ending location of the clause. - OMPDeviceClause(Expr *E, Stmt *HelperE, SourceLocation StartLoc, - SourceLocation LParenLoc, SourceLocation EndLoc) + OMPDeviceClause(Expr *E, Stmt *HelperE, OpenMPDirectiveKind CaptureRegion, + SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation EndLoc) : OMPClause(OMPC_device, StartLoc, EndLoc), OMPClauseWithPreInit(this), LParenLoc(LParenLoc), Device(E) { - setPreInitStmt(HelperE); + setPreInitStmt(HelperE, CaptureRegion); } /// \brief Build an empty clause. diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 5bb50e594ac2..739fc914a318 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -891,6 +891,7 @@ void clang::getOpenMPCaptureRegions( case OMPD_target_teams: case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: + CaptureRegions.push_back(OMPD_task); CaptureRegions.push_back(OMPD_target); CaptureRegions.push_back(OMPD_teams); break; @@ -901,6 +902,7 @@ void clang::getOpenMPCaptureRegions( break; case OMPD_target: case OMPD_target_simd: + CaptureRegions.push_back(OMPD_task); CaptureRegions.push_back(OMPD_target); break; case OMPD_teams_distribute_parallel_for: @@ -911,6 +913,7 @@ void clang::getOpenMPCaptureRegions( case OMPD_target_parallel: case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: + CaptureRegions.push_back(OMPD_task); CaptureRegions.push_back(OMPD_target); CaptureRegions.push_back(OMPD_parallel); break; @@ -925,6 +928,7 @@ void clang::getOpenMPCaptureRegions( CaptureRegions.push_back(OMPD_taskloop); break; case OMPD_target_teams_distribute_parallel_for: + CaptureRegions.push_back(OMPD_task); CaptureRegions.push_back(OMPD_target); CaptureRegions.push_back(OMPD_teams); CaptureRegions.push_back(OMPD_parallel); diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 9cff2a35784d..88ff4120026c 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -4187,6 +4187,11 @@ static void emitPrivatesInit(CodeGenFunction &CGF, auto &C = CGF.getContext(); auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); + OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) + ? OMPD_taskloop + : OMPD_task; + const CapturedStmt &CS = *D.getCapturedStmt(Kind); + CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); LValue SrcBase; bool IsTargetTask = isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || @@ -4195,16 +4200,12 @@ static void emitPrivatesInit(CodeGenFunction &CGF, // PointersArray and SizesArray. The original variables for these arrays are // not captured and we get their addresses explicitly. if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || - (IsTargetTask && Data.FirstprivateVars.size() > 3)) { + (IsTargetTask && KmpTaskSharedsPtr.isValid())) { SrcBase = CGF.MakeAddrLValue( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), SharedsTy); } - OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) - ? OMPD_taskloop - : OMPD_task; - CodeGenFunction::CGCapturedStmtInfo CapturesInfo(*D.getCapturedStmt(Kind)); FI = cast(FI->getType()->getAsTagDecl())->field_begin(); for (auto &&Pair : Privates) { auto *VD = Pair.second.PrivateCopy; @@ -4218,17 +4219,19 @@ static void emitPrivatesInit(CodeGenFunction &CGF, // PointersArray or SizesArray. LValue SharedRefLValue; QualType Type = OriginalVD->getType(); - if (IsTargetTask && isa(OriginalVD) && - isa(OriginalVD->getDeclContext()) && - cast(OriginalVD->getDeclContext())->getNumParams() == - 0 && - isa( - cast(OriginalVD->getDeclContext()) - ->getDeclContext())) { + auto *SharedField = CapturesInfo.lookup(OriginalVD); + if (IsTargetTask && !SharedField) { + assert(isa(OriginalVD) && + isa(OriginalVD->getDeclContext()) && + cast(OriginalVD->getDeclContext()) + ->getNumParams() == 0 && + isa( + cast(OriginalVD->getDeclContext()) + ->getDeclContext()) && + "Expected artificial target data variable."); SharedRefLValue = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); } else { - auto *SharedField = CapturesInfo.lookup(OriginalVD); SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); SharedRefLValue = CGF.MakeAddrLValue( Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), @@ -7040,86 +7043,27 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Value *OutlinedFn, llvm::Value *OutlinedFnID, - const Expr *IfCond, const Expr *Device, - ArrayRef CapturedVars) { + const Expr *IfCond, const Expr *Device) { if (!CGF.HaveInsertPoint()) return; assert(OutlinedFn && "Invalid outlined function!"); - // Fill up the arrays with all the captured variables. - MappableExprsHandler::MapValuesArrayTy KernelArgs; - MappableExprsHandler::MapBaseValuesArrayTy BasePointers; - MappableExprsHandler::MapValuesArrayTy Pointers; - MappableExprsHandler::MapValuesArrayTy Sizes; - MappableExprsHandler::MapFlagsArrayTy MapTypes; - - MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; - MappableExprsHandler::MapValuesArrayTy CurPointers; - MappableExprsHandler::MapValuesArrayTy CurSizes; - MappableExprsHandler::MapFlagsArrayTy CurMapTypes; - - // Get mappable expression information. - MappableExprsHandler MEHandler(D, CGF); - + const bool RequiresOuterTask = D.hasClausesOfKind(); + llvm::SmallVector CapturedVars; const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); - auto RI = CS.getCapturedRecordDecl()->field_begin(); - auto CV = CapturedVars.begin(); - for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), - CE = CS.capture_end(); - CI != CE; ++CI, ++RI, ++CV) { - CurBasePointers.clear(); - CurPointers.clear(); - CurSizes.clear(); - CurMapTypes.clear(); - - // VLA sizes are passed to the outlined region by copy and do not have map - // information associated. - if (CI->capturesVariableArrayType()) { - CurBasePointers.push_back(*CV); - CurPointers.push_back(*CV); - CurSizes.push_back(CGF.getTypeSize(RI->getType())); - // Copy to the device as an argument. No need to retrieve it. - CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | - MappableExprsHandler::OMP_MAP_TARGET_PARAM); - } else { - // If we have any information in the map clause, we use it, otherwise we - // just do a default mapping. - MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, - CurSizes, CurMapTypes); - if (CurBasePointers.empty()) - MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, - CurPointers, CurSizes, CurMapTypes); - } - // We expect to have at least an element of information for this capture. - assert(!CurBasePointers.empty() && "Non-existing map pointer for capture!"); - assert(CurBasePointers.size() == CurPointers.size() && - CurBasePointers.size() == CurSizes.size() && - CurBasePointers.size() == CurMapTypes.size() && - "Inconsistent map information sizes!"); - - // The kernel args are always the first elements of the base pointers - // associated with a capture. - KernelArgs.push_back(*CurBasePointers.front()); - // We need to append the results of this capture to what we already have. - BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); - Pointers.append(CurPointers.begin(), CurPointers.end()); - Sizes.append(CurSizes.begin(), CurSizes.end()); - MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); - } + auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, + PrePostActionTy &) { + CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); + }; + emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); + CodeGenFunction::OMPTargetDataInfo InputInfo; + llvm::Value *MapTypesArray = nullptr; // Fill up the pointer arrays and transfer execution to the device. - auto &&ThenGen = [this, &BasePointers, &Pointers, &Sizes, &MapTypes, Device, - OutlinedFn, OutlinedFnID, &D, - &KernelArgs](CodeGenFunction &CGF, PrePostActionTy &) { - auto &RT = CGF.CGM.getOpenMPRuntime(); - // Emit the offloading arrays. - TargetDataInfo Info; - emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); - emitOffloadingArraysArgument(CGF, Info.BasePointersArray, - Info.PointersArray, Info.SizesArray, - Info.MapTypesArray, Info); - + auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, + &MapTypesArray, &CS, RequiresOuterTask, + &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) { // On top of the arrays that were filled up, the target offloading call // takes as arguments the device id as well as the host pointer. The host // pointer is used by the runtime library to identify the current target @@ -7142,13 +7086,14 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, } // Emit the number of elements in the offloading arrays. - llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size()); + llvm::Value *PointerNum = + CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); // Return value of the runtime offloading call. llvm::Value *Return; - auto *NumTeams = emitNumTeamsForTargetDirective(RT, CGF, D); - auto *NumThreads = emitNumThreadsForTargetDirective(RT, CGF, D); + auto *NumTeams = emitNumTeamsForTargetDirective(*this, CGF, D); + auto *NumThreads = emitNumThreadsForTargetDirective(*this, CGF, D); bool HasNowait = D.hasClausesOfKind(); // The target region is an outlined function launched by the runtime @@ -7186,25 +7131,30 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, // passed to the runtime library - a 32-bit integer with the value zero. assert(NumThreads && "Thread limit expression should be available along " "with number of teams."); - llvm::Value *OffloadingArgs[] = { - DeviceID, OutlinedFnID, - PointerNum, Info.BasePointersArray, - Info.PointersArray, Info.SizesArray, - Info.MapTypesArray, NumTeams, - NumThreads}; + llvm::Value *OffloadingArgs[] = {DeviceID, + OutlinedFnID, + PointerNum, + InputInfo.BasePointersArray.getPointer(), + InputInfo.PointersArray.getPointer(), + InputInfo.SizesArray.getPointer(), + MapTypesArray, + NumTeams, + NumThreads}; Return = CGF.EmitRuntimeCall( - RT.createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait - : OMPRTL__tgt_target_teams), + createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait + : OMPRTL__tgt_target_teams), OffloadingArgs); } else { - llvm::Value *OffloadingArgs[] = { - DeviceID, OutlinedFnID, - PointerNum, Info.BasePointersArray, - Info.PointersArray, Info.SizesArray, - Info.MapTypesArray}; + llvm::Value *OffloadingArgs[] = {DeviceID, + OutlinedFnID, + PointerNum, + InputInfo.BasePointersArray.getPointer(), + InputInfo.PointersArray.getPointer(), + InputInfo.SizesArray.getPointer(), + MapTypesArray}; Return = CGF.EmitRuntimeCall( - RT.createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait - : OMPRTL__tgt_target), + createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait + : OMPRTL__tgt_target), OffloadingArgs); } @@ -7217,17 +7167,114 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); CGF.EmitBlock(OffloadFailedBlock); - emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, KernelArgs); + if (RequiresOuterTask) { + CapturedVars.clear(); + CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); + } + emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, CapturedVars); CGF.EmitBranch(OffloadContBlock); CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); }; // Notify that the host version must be executed. - auto &&ElseGen = [this, &D, OutlinedFn, &KernelArgs](CodeGenFunction &CGF, - PrePostActionTy &) { - emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, - KernelArgs); + auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, + RequiresOuterTask](CodeGenFunction &CGF, + PrePostActionTy &) { + if (RequiresOuterTask) { + CapturedVars.clear(); + CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); + } + emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, CapturedVars); + }; + + auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, + &CapturedVars, RequiresOuterTask, + &CS](CodeGenFunction &CGF, PrePostActionTy &) { + // Fill up the arrays with all the captured variables. + MappableExprsHandler::MapBaseValuesArrayTy BasePointers; + MappableExprsHandler::MapValuesArrayTy Pointers; + MappableExprsHandler::MapValuesArrayTy Sizes; + MappableExprsHandler::MapFlagsArrayTy MapTypes; + + MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; + MappableExprsHandler::MapValuesArrayTy CurPointers; + MappableExprsHandler::MapValuesArrayTy CurSizes; + MappableExprsHandler::MapFlagsArrayTy CurMapTypes; + + // Get mappable expression information. + MappableExprsHandler MEHandler(D, CGF); + + auto RI = CS.getCapturedRecordDecl()->field_begin(); + auto CV = CapturedVars.begin(); + for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), + CE = CS.capture_end(); + CI != CE; ++CI, ++RI, ++CV) { + CurBasePointers.clear(); + CurPointers.clear(); + CurSizes.clear(); + CurMapTypes.clear(); + + // VLA sizes are passed to the outlined region by copy and do not have map + // information associated. + if (CI->capturesVariableArrayType()) { + CurBasePointers.push_back(*CV); + CurPointers.push_back(*CV); + CurSizes.push_back(CGF.getTypeSize(RI->getType())); + // Copy to the device as an argument. No need to retrieve it. + CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | + MappableExprsHandler::OMP_MAP_TARGET_PARAM); + } else { + // If we have any information in the map clause, we use it, otherwise we + // just do a default mapping. + MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, + CurSizes, CurMapTypes); + if (CurBasePointers.empty()) + MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, + CurPointers, CurSizes, CurMapTypes); + } + // We expect to have at least an element of information for this capture. + assert(!CurBasePointers.empty() && + "Non-existing map pointer for capture!"); + assert(CurBasePointers.size() == CurPointers.size() && + CurBasePointers.size() == CurSizes.size() && + CurBasePointers.size() == CurMapTypes.size() && + "Inconsistent map information sizes!"); + + // We need to append the results of this capture to what we already have. + BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); + Pointers.append(CurPointers.begin(), CurPointers.end()); + Sizes.append(CurSizes.begin(), CurSizes.end()); + MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); + } + + TargetDataInfo Info; + // Fill up the arrays and create the arguments. + emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); + emitOffloadingArraysArgument(CGF, Info.BasePointersArray, + Info.PointersArray, Info.SizesArray, + Info.MapTypesArray, Info); + InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; + InputInfo.BasePointersArray = + Address(Info.BasePointersArray, CGM.getPointerAlign()); + InputInfo.PointersArray = + Address(Info.PointersArray, CGM.getPointerAlign()); + InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); + MapTypesArray = Info.MapTypesArray; + if (RequiresOuterTask) + CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); + else + emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); + }; + + auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( + CodeGenFunction &CGF, PrePostActionTy &) { + if (RequiresOuterTask) { + CodeGenFunction::OMPTargetDataInfo InputInfo; + CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); + } else { + emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); + } }; // If we have a target function ID it means that we need to support @@ -7235,14 +7282,14 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, // regardless of the conditional in the if clause if, e.g., the user do not // specify target triples. if (OutlinedFnID) { - if (IfCond) - emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); - else { - RegionCodeGenTy ThenRCG(ThenGen); + if (IfCond) { + emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); + } else { + RegionCodeGenTy ThenRCG(TargetThenGen); ThenRCG(CGF); } } else { - RegionCodeGenTy ElseRCG(ElseGen); + RegionCodeGenTy ElseRCG(TargetElseGen); ElseRCG(CGF); } } @@ -8260,8 +8307,7 @@ void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Value *OutlinedFn, llvm::Value *OutlinedFnID, - const Expr *IfCond, const Expr *Device, - ArrayRef CapturedVars) { + const Expr *IfCond, const Expr *Device) { llvm_unreachable("Not supported in SIMD-only mode"); } diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 706b48c235d8..e1ba8334368e 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -1207,13 +1207,11 @@ public: /// directive, or null if no if clause is used. /// \param Device Expression evaluated in device clause associated with the /// target directive, or null if no device clause is used. - /// \param CapturedVars Values captured in the current region. virtual void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Value *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, - const Expr *Device, - ArrayRef CapturedVars); + const Expr *Device); /// \brief Emit the target regions enclosed in \a GD function definition or /// the function itself in case it is a valid device function. Returns true if @@ -1833,11 +1831,9 @@ public: /// directive, or null if no if clause is used. /// \param Device Expression evaluated in device clause associated with the /// target directive, or null if no device clause is used. - /// \param CapturedVars Values captured in the current region. void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Value *OutlinedFn, llvm::Value *OutlinedFnID, - const Expr *IfCond, const Expr *Device, - ArrayRef CapturedVars) override; + const Expr *IfCond, const Expr *Device) override; /// \brief Emit the target regions enclosed in \a GD function definition or /// the function itself in case it is a valid device function. Returns true if diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 116647d7cfa5..c6a0cdb5a549 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -3091,12 +3091,14 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( } // Privatize all private variables except for in_reduction items. (void)Scope.Privatize(); - InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( - CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0, CGF.getPointerSize()); - InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( - CGF.GetAddrOfLocalVar(PVD), /*Index=*/0, CGF.getPointerSize()); - InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( - CGF.GetAddrOfLocalVar(SVD), /*Index=*/0, CGF.getSizeSize()); + if (InputInfo.NumberOfTargetItems > 0) { + InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( + CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0, CGF.getPointerSize()); + InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( + CGF.GetAddrOfLocalVar(PVD), /*Index=*/0, CGF.getPointerSize()); + InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( + CGF.GetAddrOfLocalVar(SVD), /*Index=*/0, CGF.getSizeSize()); + } Action.Enter(CGF); OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false); @@ -3910,7 +3912,6 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen) { assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); CodeGenModule &CGM = CGF.CGM; - const CapturedStmt &CS = *S.getCapturedStmt(OMPD_target); llvm::Function *Fn = nullptr; llvm::Constant *FnID = nullptr; @@ -3958,11 +3959,8 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, // Emit target region as a standalone region. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, IsOffloadEntry, CodeGen); - OMPLexicalScope Scope(CGF, S); - llvm::SmallVector CapturedVars; - CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); - CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, - CapturedVars); + OMPLexicalScope Scope(CGF, S, OMPD_task); + CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device); } static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 8c7111fe646b..4f45d6d1a3b6 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -2121,13 +2121,28 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { case OMPD_target_parallel_for_simd: case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: { + QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1); + QualType Args[] = {Context.VoidPtrTy.withConst().withRestrict()}; + FunctionProtoType::ExtProtoInfo EPI; + EPI.Variadic = true; + QualType CopyFnType = Context.getFunctionType(Context.VoidTy, Args, EPI); + Sema::CapturedParamNameType Params[] = { + std::make_pair(".global_tid.", KmpInt32Ty), + std::make_pair(".part_id.", Context.getPointerType(KmpInt32Ty)), + std::make_pair(".privates.", Context.VoidPtrTy.withConst()), + std::make_pair(".copy_fn.", + Context.getPointerType(CopyFnType).withConst()), + std::make_pair(".task_t.", Context.VoidPtrTy.withConst()), + std::make_pair(StringRef(), QualType()) // __context with shared vars + }; + ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, + Params); Sema::CapturedParamNameType ParamsTarget[] = { std::make_pair(StringRef(), QualType()) // __context with shared vars }; // Start a captured region for 'target' with no implicit parameters. ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, ParamsTarget); - QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1); QualType KmpInt32PtrTy = Context.getPointerType(KmpInt32Ty).withConst().withRestrict(); Sema::CapturedParamNameType ParamsTeamsOrParallel[] = { @@ -2141,6 +2156,33 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { ParamsTeamsOrParallel); break; } + case OMPD_target: + case OMPD_target_simd: { + QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1); + QualType Args[] = {Context.VoidPtrTy.withConst().withRestrict()}; + FunctionProtoType::ExtProtoInfo EPI; + EPI.Variadic = true; + QualType CopyFnType = Context.getFunctionType(Context.VoidTy, Args, EPI); + Sema::CapturedParamNameType Params[] = { + std::make_pair(".global_tid.", KmpInt32Ty), + std::make_pair(".part_id.", Context.getPointerType(KmpInt32Ty)), + std::make_pair(".privates.", Context.VoidPtrTy.withConst()), + std::make_pair(".copy_fn.", + Context.getPointerType(CopyFnType).withConst()), + std::make_pair(".task_t.", Context.VoidPtrTy.withConst()), + std::make_pair(StringRef(), QualType()) // __context with shared vars + }; + ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, + Params); + // Mark this captured region as inlined, because we don't use outlined + // function directly. + getCurCapturedRegion()->TheCapturedDecl->addAttr( + AlwaysInlineAttr::CreateImplicit( + Context, AlwaysInlineAttr::Keyword_forceinline, SourceRange())); + ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, + std::make_pair(StringRef(), QualType())); + break; + } case OMPD_simd: case OMPD_for: case OMPD_for_simd: @@ -2154,9 +2196,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { case OMPD_distribute_simd: case OMPD_ordered: case OMPD_atomic: - case OMPD_target_data: - case OMPD_target: - case OMPD_target_simd: { + case OMPD_target_data: { Sema::CapturedParamNameType Params[] = { std::make_pair(StringRef(), QualType()) // __context with shared vars }; @@ -2247,6 +2287,21 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { QualType KmpInt32PtrTy = Context.getPointerType(KmpInt32Ty).withConst().withRestrict(); + QualType Args[] = {Context.VoidPtrTy.withConst().withRestrict()}; + FunctionProtoType::ExtProtoInfo EPI; + EPI.Variadic = true; + QualType CopyFnType = Context.getFunctionType(Context.VoidTy, Args, EPI); + Sema::CapturedParamNameType Params[] = { + std::make_pair(".global_tid.", KmpInt32Ty), + std::make_pair(".part_id.", Context.getPointerType(KmpInt32Ty)), + std::make_pair(".privates.", Context.VoidPtrTy.withConst()), + std::make_pair(".copy_fn.", + Context.getPointerType(CopyFnType).withConst()), + std::make_pair(".task_t.", Context.VoidPtrTy.withConst()), + std::make_pair(StringRef(), QualType()) // __context with shared vars + }; + ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, + Params); Sema::CapturedParamNameType ParamsTarget[] = { std::make_pair(StringRef(), QualType()) // __context with shared vars }; @@ -6354,13 +6409,23 @@ StmtResult Sema::ActOnOpenMPTargetDirective(ArrayRef Clauses, // The point of exit cannot be a branch out of the structured block. // longjmp() and throw() must not violate the entry/exit criteria. CS->getCapturedDecl()->setNothrow(); + for (int ThisCaptureLevel = getOpenMPCaptureLevels(OMPD_target); + ThisCaptureLevel > 1; --ThisCaptureLevel) { + CS = cast(CS->getCapturedStmt()); + // 1.2.2 OpenMP Language Terminology + // Structured block - An executable statement with a single entry at the + // top and a single exit at the bottom. + // The point of exit cannot be a branch out of the structured block. + // longjmp() and throw() must not violate the entry/exit criteria. + CS->getCapturedDecl()->setNothrow(); + } // OpenMP [2.16, Nesting of Regions] // If specified, a teams construct must be contained within a target // construct. That target construct must contain no statements or directives // outside of the teams construct. if (DSAStack->hasInnerTeamsRegion()) { - auto S = AStmt->IgnoreContainers(/*IgnoreCaptured*/ true); + Stmt *S = CS->IgnoreContainers(/*IgnoreCaptured=*/true); bool OMPTeamsFound = true; if (auto *CS = dyn_cast(S)) { auto I = CS->body_begin(); @@ -6407,6 +6472,16 @@ Sema::ActOnOpenMPTargetParallelDirective(ArrayRef Clauses, // The point of exit cannot be a branch out of the structured block. // longjmp() and throw() must not violate the entry/exit criteria. CS->getCapturedDecl()->setNothrow(); + for (int ThisCaptureLevel = getOpenMPCaptureLevels(OMPD_target_parallel); + ThisCaptureLevel > 1; --ThisCaptureLevel) { + CS = cast(CS->getCapturedStmt()); + // 1.2.2 OpenMP Language Terminology + // Structured block - An executable statement with a single entry at the + // top and a single exit at the bottom. + // The point of exit cannot be a branch out of the structured block. + // longjmp() and throw() must not violate the entry/exit criteria. + CS->getCapturedDecl()->setNothrow(); + } getCurFunction()->setHasBranchProtectedScope(); @@ -8049,6 +8124,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_target_update: case OMPD_target_enter_data: case OMPD_target_exit_data: + case OMPD_target: CaptureRegion = OMPD_task; break; case OMPD_target_teams: @@ -8057,7 +8133,6 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_target_teams_distribute_parallel_for: case OMPD_target_teams_distribute_parallel_for_simd: case OMPD_target_data: - case OMPD_target: case OMPD_target_simd: case OMPD_target_parallel: case OMPD_target_parallel_for: @@ -11419,8 +11494,8 @@ OMPClause *Sema::ActOnOpenMPDeviceClause(Expr *Device, SourceLocation StartLoc, HelperValStmt = buildPreInits(Context, Captures); } - return new (Context) - OMPDeviceClause(ValExpr, HelperValStmt, StartLoc, LParenLoc, EndLoc); + return new (Context) OMPDeviceClause(ValExpr, HelperValStmt, CaptureRegion, + StartLoc, LParenLoc, EndLoc); } static bool CheckTypeMappable(SourceLocation SL, SourceRange SR, Sema &SemaRef, diff --git a/clang/test/OpenMP/target_codegen.cpp b/clang/test/OpenMP/target_codegen.cpp index 4d1d266ef208..3671f44e58d7 100644 --- a/clang/test/OpenMP/target_codegen.cpp +++ b/clang/test/OpenMP/target_codegen.cpp @@ -115,7 +115,9 @@ int foo(int n) { static long *plocal; // CHECK: [[ADD:%.+]] = add nsw i32 - // CHECK: [[DEVICE:%.+]] = sext i32 [[ADD]] to i64 + // CHECK: store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]], + // CHECK: [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]], + // CHECK: [[DEVICE:%.+]] = sext i32 [[DEV]] to i64 // CHECK: [[RET:%.+]] = call i32 @__tgt_target(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null) // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] @@ -128,7 +130,9 @@ int foo(int n) { } // CHECK-DAG: [[ADD:%.+]] = add nsw i32 - // CHECK-DAG: [[DEVICE:%.+]] = sext i32 [[ADD]] to i64 + // CHECK-DAG: store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]], + // CHECK-DAG: [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]], + // CHECK-DAG: [[DEVICE:%.+]] = sext i32 [[DEV]] to i64 // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0) // CHECK-DAG: [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0 @@ -234,13 +238,13 @@ int foo(int n) { // CHECK-32: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]], // CHECK-32: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]], + // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20 + // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[IFELSE:[^,]+]] + // CHECK: [[TRY]] // CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4 // CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]] // CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8 - // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20 - // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[IFELSE:[^,]+]] - // CHECK: [[TRY]] // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0)) // CHECK-DAG: [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0 @@ -510,13 +514,13 @@ int bar(int n){ // CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]], // CHECK-32: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]], +// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60 +// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[IFELSE:[^,]+]] +// CHECK: [[TRY]] // We capture 2 VLA sizes in this target region // CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]] // CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2 -// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60 -// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[IFELSE:[^,]+]] -// CHECK: [[TRY]] // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0)) // CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0 diff --git a/clang/test/OpenMP/target_depend_codegen.cpp b/clang/test/OpenMP/target_depend_codegen.cpp new file mode 100644 index 000000000000..d7eb93d87adf --- /dev/null +++ b/clang/test/OpenMP/target_depend_codegen.cpp @@ -0,0 +1,261 @@ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 + +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} + +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 + +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s +// SIMD-ONLY1-NOT: {{__kmpc|__tgt}} + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK-DAG: [[TT:%.+]] = type { i64, i8 } +// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 } +// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* } +// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* } + +// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 } + +// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat + +// CHECK-DAG: [[SIZET:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 0, i[[SZ]] 4] +// CHECK-DAG: [[MAPT:@.+]] = private unnamed_addr constant [2 x i64] [i64 32, i64 288] +// CHECK-DAG: @{{.*}} = private constant i8 0 + +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = {{.*}}constant [[ENTTY]] +// TCHECK-NOT: @{{.+}} = constant [[ENTTY]] + +// Check if offloading descriptor is created. +// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]] +// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]] +// CHECK: [[DEVBEGIN:@.+]] = external constant i8 +// CHECK: [[DEVEND:@.+]] = external constant i8 +// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]]) +// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]]) + +// Check target registration is registered as a Ctor. +// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }] + + +template +struct TT{ + tx X; + ty Y; +}; + +int global; +extern int global; + +// CHECK: define {{.*}}[[FOO:@.+]]( +int foo(int n) { + int a = 0; + short aa = 0; + float b[10]; + float bn[n]; + double c[5][10]; + double cn[5][n]; + TT d; + static long *plocal; + + // CHECK: [[ADD:%.+]] = add nsw i32 + // CHECK: store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]], + // CHECK: [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0 + // CHECK: [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]], + // CHECK: store i32 [[DEV]], i32* [[GEP]], + // CHECK: [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID:%.+]], i32 1, i[[SZ]] {{20|40}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*)) + // CHECK: [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY0:%.+]]* + // CHECK: getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0 + // CHECK: getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1 + // CHECK: getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2 + // CHECK: getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 3 + // CHECK: [[DEP_START:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0 + // CHECK: [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8* + // CHECK: call void @__kmpc_omp_wait_deps(%ident_t* @0, i32 [[GTID]], i32 4, i8* [[DEP]], i32 0, i8* null) + // CHECK: call void @__kmpc_omp_task_begin_if0(%ident_t* @0, i32 [[GTID]], i8* [[TASK]]) + // CHECK: call i32 [[TASK_ENTRY0]](i32 [[GTID]], [[TASK_TY0]]* [[BC_TASK]]) + // CHECK: call void @__kmpc_omp_task_complete_if0(%ident_t* @0, i32 [[GTID]], i8* [[TASK]]) + #pragma omp target device(global + a) depend(in: global) depend(out: a, b, cn[4]) + { + } + + // CHECK: [[ADD:%.+]] = add nsw i32 + // CHECK: store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]], + + // CHECK: [[BOOL:%.+]] = icmp ne i32 %{{.+}}, 0 + // CHECK: br i1 [[BOOL]], label %[[THEN:.+]], label %[[ELSE:.+]] + // CHECK: [[THEN]]: + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%.+]], i32 0, i32 0 + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%.+]], i32 0, i32 0 + // CHECK-DAG: [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]** + // CHECK-DAG: [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]** + // CHECK-DAG: store i[[SZ]]* [[BP0:%[^,]+]], i[[SZ]]** [[CBPADDR0]] + // CHECK-DAG: store i[[SZ]]* [[BP0]], i[[SZ]]** [[CPADDR0]] + + // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1 + // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1 + // CHECK-DAG: [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]* + // CHECK-DAG: [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] [[BP1:%[^,]+]], i[[SZ]]* [[CBPADDR1]] + // CHECK-DAG: store i[[SZ]] [[BP1]], i[[SZ]]* [[CPADDR1]] + // CHECK-DAG: getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0 + // CHECK-DAG: getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0 + // CHECK: [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2 + // CHECK: [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]], + // CHECK: store i32 [[DEV]], i32* [[GEP]], + + // CHECK: [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{104|52}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*)) + // CHECK: [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]* + // CHECK: getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0 + // CHECK: getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1 + // CHECK: getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2 + // CHECK: [[DEP_START:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0 + // CHECK: [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8* + // CHECK: call i32 @__kmpc_omp_task_with_deps(%ident_t* @0, i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null) + // CHECK: br label %[[EXIT:.+]] + + // CHECK: [[ELSE]]: + // CHECK-NOT: getelementptr inbounds [2 x i8*], [2 x i8*]* + // CHECK: [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2 + // CHECK: [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]], + // CHECK: store i32 [[DEV]], i32* [[GEP]], + + // CHECK: [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{56|28}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1__:@.+]] to i32 (i32, i8*)*)) + // CHECK: [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1__:%.+]]* + // CHECK: getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0 + // CHECK: getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1 + // CHECK: getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2 + // CHECK: [[DEP_START:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0 + // CHECK: [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8* + // CHECK: call i32 @__kmpc_omp_task_with_deps(%ident_t* @0, i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null) + // CHECK: br label %[[EXIT:.+]] + // CHECK: [[EXIT]]: + + #pragma omp target device(global + a) nowait depend(inout: global, a, bn) if(a) + { + static int local1; + *plocal = global; + local1 = global; + } + + // CHECK: [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{48|24}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*)) + // CHECK: [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY2:%.+]]* + // CHECK: getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0 + // CHECK: [[DEP_START:%.+]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0 + // CHECK: [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8* + // CHECK: call void @__kmpc_omp_wait_deps(%ident_t* @0, i32 [[GTID]], i32 1, i8* [[DEP]], i32 0, i8* null) + // CHECK: call void @__kmpc_omp_task_begin_if0(%ident_t* @0, i32 [[GTID]], i8* [[TASK]]) + // CHECK: call i32 [[TASK_ENTRY2]](i32 [[GTID]], [[TASK_TY2]]* [[BC_TASK]]) + // CHECK: call void @__kmpc_omp_task_complete_if0(%ident_t* @0, i32 [[GTID]], i8* [[TASK]]) + #pragma omp target if(0) firstprivate(global) depend(out:global) + { + global += 1; + } + + return a; +} + +// Check that the offloading functions are emitted and that the arguments are +// correct and loaded correctly for the target regions in foo(). + +// CHECK: define internal void [[HVT0:@.+]]() + +// CHECK: define internal{{.*}} i32 [[TASK_ENTRY0]](i32{{.*}}, [[TASK_TY0]]* noalias) +// CHECK: store void (i8*, ...)* null, void (i8*, ...)** % +// CHECK: [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0 +// CHECK: [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]], +// CHECK: [[DEVICE:%.+]] = sext i32 [[DEV]] to i64 +// CHECK: [[RET:%.+]] = call i32 @__tgt_target(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null) +// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] +// CHECK: [[FAIL]] +// CHECK: call void [[HVT0]]() +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] +// CHECK: ret i32 0 + +// CHECK: define internal void [[HVT1:@.+]](i[[SZ]]* %{{.+}}, i[[SZ]] %{{.+}}) + +// CHECK: define internal{{.*}} i32 [[TASK_ENTRY1_]](i32{{.*}}, [[TASK_TY1_]]* noalias) +// CHECK: call void (i8*, ...) % +// CHECK: [[SZT:%.+]] = getelementptr inbounds [2 x i[[SZ]]], [2 x i[[SZ]]]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0 +// CHECK: [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2 +// CHECK: [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]], +// CHECK: [[DEVICE:%.+]] = sext i32 [[DEV]] to i64 +// CHECK: [[RET:%.+]] = call i32 @__tgt_target_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0) + +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] +// CHECK: [[FAIL]] +// CHECK: [[BP0:%.+]] = load i[[SZ]]*, i[[SZ]]** % +// CHECK: [[BP1_I32:%.+]] = load i32, i32* % +// CHECK-64: [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32* +// CHECK-64: store i32 [[BP1_I32]], i32* [[BP1_CAST]], +// CHECK-32: store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]], +// CHECK: [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]], +// CHECK: call void [[HVT1]](i[[SZ]]* [[BP0]], i[[SZ]] [[BP1]]) +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] +// CHECK: ret i32 0 + +// CHECK: define internal{{.*}} i32 [[TASK_ENTRY1__]](i32{{.*}}, [[TASK_TY1__]]* noalias) +// CHECK: call void (i8*, ...) % +// CHECK: [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2 +// CHECK: [[BP0:%.+]] = load i[[SZ]]*, i[[SZ]]** % +// CHECK: [[BP1_I32:%.+]] = load i32, i32* % +// CHECK-64: [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32* +// CHECK-64: store i32 [[BP1_I32]], i32* [[BP1_CAST]], +// CHECK-32: store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]], +// CHECK: [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]], +// CHECK: call void [[HVT1]](i[[SZ]]* [[BP0]], i[[SZ]] [[BP1]]) +// CHECK: ret i32 0 + +// CHECK: define internal void [[HVT2:@.+]](i[[SZ]] %{{.+}}) +// Create stack storage and store argument in there. +// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align +// CHECK-64: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32* +// CHECK-64: load i32, i32* [[AA_CADDR]], align +// CHECK-32: load i32, i32* [[AA_ADDR]], align + +// CHECK: define internal{{.*}} i32 [[TASK_ENTRY2]](i32{{.*}}, [[TASK_TY2]]* noalias) +// CHECK: call void (i8*, ...) % +// CHECK: [[BP1_I32:%.+]] = load i32, i32* % +// CHECK-64: [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32* +// CHECK-64: store i32 [[BP1_I32]], i32* [[BP1_CAST]], +// CHECK-32: store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]], +// CHECK: [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]], +// CHECK: call void [[HVT2]](i[[SZ]] [[BP1]]) +// CHECK: ret i32 0 + + +#endif diff --git a/clang/test/OpenMP/target_parallel_codegen.cpp b/clang/test/OpenMP/target_parallel_codegen.cpp index 2689e9a2f8f6..7a76317efe74 100644 --- a/clang/test/OpenMP/target_parallel_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_codegen.cpp @@ -197,13 +197,13 @@ int foo(int n) { // CHECK-32: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]], // CHECK-32: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]], + // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20 + // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] + // CHECK: [[TRY]] // CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4 // CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]] // CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8 - // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20 - // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] - // CHECK: [[TRY]] // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i32 1, i32 0) // CHECK-DAG: [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0 @@ -531,13 +531,13 @@ int bar(int n){ // CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]], // CHECK-32: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]], +// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60 +// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] +// CHECK: [[TRY]] // We capture 2 VLA sizes in this target region // CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]] // CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2 -// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60 -// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] -// CHECK: [[TRY]] // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 0) // CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0 diff --git a/clang/test/OpenMP/target_parallel_for_codegen.cpp b/clang/test/OpenMP/target_parallel_for_codegen.cpp index de99e6d18c3f..1bd9abb4d6d9 100644 --- a/clang/test/OpenMP/target_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_codegen.cpp @@ -218,13 +218,13 @@ int foo(int n) { // CHECK-32: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]], // CHECK-32: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]], + // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20 + // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] + // CHECK: [[TRY]] // CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4 // CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]] // CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8 - // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20 - // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] - // CHECK: [[TRY]] // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 10, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* [[MAPT4]], i32 0, i32 0), i32 1, i32 0) // CHECK-DAG: [[BPR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P:%[^,]+]], i32 0, i32 0 @@ -558,13 +558,13 @@ int bar(int n){ // CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]], // CHECK-32: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]], +// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60 +// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] +// CHECK: [[TRY]] // We capture 2 VLA sizes in this target region // CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]] // CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2 -// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60 -// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] -// CHECK: [[TRY]] // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 0) // CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0 diff --git a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp index 600c767f8e48..3da6b81d0952 100644 --- a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp @@ -215,13 +215,13 @@ int foo(int n) { // CHECK-32: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]], // CHECK-32: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]], + // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20 + // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] + // CHECK: [[TRY]] // CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4 // CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]] // CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8 - // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20 - // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] - // CHECK: [[TRY]] // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 10, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* [[MAPT4]], i32 0, i32 0), i32 1, i32 0) // CHECK-DAG: [[BPR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P:%[^,]+]], i32 0, i32 0 @@ -558,13 +558,13 @@ int bar(int n){ // CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]], // CHECK-32: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]], +// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60 +// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] +// CHECK: [[TRY]] // We capture 2 VLA sizes in this target region // CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]] // CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2 -// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60 -// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] -// CHECK: [[TRY]] // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 0) // CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0 diff --git a/clang/test/OpenMP/target_simd_codegen.cpp b/clang/test/OpenMP/target_simd_codegen.cpp index 868219d984a2..7ed02435e7e6 100644 --- a/clang/test/OpenMP/target_simd_codegen.cpp +++ b/clang/test/OpenMP/target_simd_codegen.cpp @@ -210,13 +210,13 @@ int foo(int n) { // CHECK-32: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]], // CHECK-32: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]], + // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20 + // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] + // CHECK: [[TRY]] // CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4 // CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]] // CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8 - // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20 - // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] - // CHECK: [[TRY]] // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0)) // CHECK-DAG: [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0 @@ -481,13 +481,13 @@ int bar(int n){ // CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]], // CHECK-32: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]], +// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60 +// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] +// CHECK: [[TRY]] // We capture 2 VLA sizes in this target region // CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]] // CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2 -// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60 -// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] -// CHECK: [[TRY]] // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0)) // CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0 diff --git a/clang/test/OpenMP/target_teams_codegen.cpp b/clang/test/OpenMP/target_teams_codegen.cpp index 4acc149053f1..349c0de2a4ef 100644 --- a/clang/test/OpenMP/target_teams_codegen.cpp +++ b/clang/test/OpenMP/target_teams_codegen.cpp @@ -220,13 +220,13 @@ int foo(int n) { // CHECK-32: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]], // CHECK-32: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]], + // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20 + // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] + // CHECK: [[TRY]] // CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4 // CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]] // CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8 - // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20 - // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] - // CHECK: [[TRY]] // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i32 0, i32 0) // CHECK-DAG: [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0 @@ -556,13 +556,13 @@ int bar(int n){ // CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]], // CHECK-32: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]], +// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60 +// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] +// CHECK: [[TRY]] // We capture 2 VLA sizes in this target region // CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]] // CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2 -// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60 -// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] -// CHECK: [[TRY]] // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0), i32 0, i32 0) // CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0 diff --git a/clang/test/OpenMP/target_teams_distribute_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_codegen.cpp index aff7326cbe41..a033bfe2d326 100644 --- a/clang/test/OpenMP/target_teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_codegen.cpp @@ -221,13 +221,13 @@ int foo(int n) { // CHECK-32: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]], // CHECK-32: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]], + // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20 + // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] + // CHECK: [[TRY]] // CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4 // CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]] // CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8 - // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20 - // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] - // CHECK: [[TRY]] // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 10, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* [[MAPT4]], i32 0, i32 0), i32 0, i32 0) // CHECK-DAG: [[BPR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P:%[^,]+]], i32 0, i32 0 @@ -567,13 +567,13 @@ int bar(int n){ // CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]], // CHECK-32: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]], +// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60 +// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] +// CHECK: [[TRY]] // We capture 2 VLA sizes in this target region // CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]] // CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2 -// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60 -// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] -// CHECK: [[TRY]] // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT5]], i32 0, i32 0), i32 0, i32 0) // CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0 diff --git a/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp index 37a35ffdb87e..3553aa24e6ec 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp @@ -220,13 +220,13 @@ int foo(int n) { // CHECK-32: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]], // CHECK-32: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]], + // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20 + // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] + // CHECK: [[TRY]] // CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4 // CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]] // CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8 - // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20 - // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] - // CHECK: [[TRY]] // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i32 0, i32 0) // CHECK-DAG: [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0 @@ -556,13 +556,13 @@ int bar(int n){ // CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]], // CHECK-32: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]], +// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60 +// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] +// CHECK: [[TRY]] // We capture 2 VLA sizes in this target region // CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]] // CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2 -// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60 -// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] -// CHECK: [[TRY]] // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT5]], i32 0, i32 0), i32 0, i32 0) // CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0