diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index ab29e32929ce..5d74d91065f5 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -125,8 +125,13 @@ Address CodeGenFunction::CreateDefaultAlignTempAlloca(llvm::Type *Ty, } void CodeGenFunction::InitTempAlloca(Address Var, llvm::Value *Init) { - assert(isa(Var.getPointer())); - auto *Store = new llvm::StoreInst(Init, Var.getPointer(), /*volatile*/ false, + auto *Alloca = Var.getPointer(); + assert(isa(Alloca) || + (isa(Alloca) && + isa( + cast(Alloca)->getPointerOperand()))); + + auto *Store = new llvm::StoreInst(Init, Alloca, /*volatile*/ false, Var.getAlignment().getAsAlign()); llvm::BasicBlock *Block = AllocaInsertPt->getParent(); Block->getInstList().insertAfter(AllocaInsertPt->getIterator(), Store); diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp new file mode 100644 index 000000000000..ccffdf43549f --- /dev/null +++ b/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp @@ -0,0 +1,61 @@ +//===-- CGOpenMPRuntimeAMDGCN.cpp - Interface to OpenMP AMDGCN Runtimes --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This provides a class for OpenMP runtime code generation specialized to +// AMDGCN targets from generalized CGOpenMPRuntimeGPU class. +// +//===----------------------------------------------------------------------===// + +#include "CGOpenMPRuntimeAMDGCN.h" +#include "CGOpenMPRuntimeGPU.h" +#include "CodeGenFunction.h" +#include "clang/AST/Attr.h" +#include "clang/AST/DeclOpenMP.h" +#include "clang/AST/StmtOpenMP.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/Basic/Cuda.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" + +using namespace clang; +using namespace CodeGen; +using namespace llvm::omp; + +CGOpenMPRuntimeAMDGCN::CGOpenMPRuntimeAMDGCN(CodeGenModule &CGM) + : CGOpenMPRuntimeGPU(CGM) { + if (!CGM.getLangOpts().OpenMPIsDevice) + llvm_unreachable("OpenMP AMDGCN can only handle device code."); +} + +llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUWarpSize(CodeGenFunction &CGF) { + CGBuilderTy &Bld = CGF.Builder; + // return constant compile-time target-specific warp size + unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size); + return Bld.getInt32(WarpSize); +} + +llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUThreadID(CodeGenFunction &CGF) { + CGBuilderTy &Bld = CGF.Builder; + llvm::Function *F = + CGF.CGM.getIntrinsic(llvm::Intrinsic::amdgcn_workitem_id_x); + return Bld.CreateCall(F, llvm::None, "nvptx_tid"); +} + +llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUNumThreads(CodeGenFunction &CGF) { + CGBuilderTy &Bld = CGF.Builder; + llvm::Module *M = &CGF.CGM.getModule(); + const char *LocSize = "__ockl_get_local_size"; + llvm::Function *F = M->getFunction(LocSize); + if (!F) { + F = llvm::Function::Create( + llvm::FunctionType::get(CGF.Int64Ty, {CGF.Int32Ty}, false), + llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.CGM.getModule()); + } + return Bld.CreateTrunc( + Bld.CreateCall(F, {Bld.getInt32(0)}, "nvptx_num_threads"), CGF.Int32Ty); +} diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h b/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h new file mode 100644 index 000000000000..c1421261bfc1 --- /dev/null +++ b/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h @@ -0,0 +1,43 @@ +//===--- CGOpenMPRuntimeAMDGCN.h - Interface to OpenMP AMDGCN Runtimes ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This provides a class for OpenMP runtime code generation specialized to +// AMDGCN targets from generalized CGOpenMPRuntimeGPU class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEAMDGCN_H +#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEAMDGCN_H + +#include "CGOpenMPRuntime.h" +#include "CGOpenMPRuntimeGPU.h" +#include "CodeGenFunction.h" +#include "clang/AST/StmtOpenMP.h" + +namespace clang { +namespace CodeGen { + +class CGOpenMPRuntimeAMDGCN final : public CGOpenMPRuntimeGPU { + +public: + explicit CGOpenMPRuntimeAMDGCN(CodeGenModule &CGM); + + /// Get the GPU warp size. + llvm::Value *getGPUWarpSize(CodeGenFunction &CGF) override; + + /// Get the id of the current thread on the GPU. + llvm::Value *getGPUThreadID(CodeGenFunction &CGF) override; + + /// Get the maximum number of threads in a block of the GPU. + llvm::Value *getGPUNumThreads(CodeGenFunction &CGF) override; +}; + +} // namespace CodeGen +} // namespace clang + +#endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEAMDGCN_H diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 1cd89c540f47..452eb15eb8d1 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // // This provides a generalized class for OpenMP runtime code generation -// specialized by GPU target NVPTX. +// specialized by GPU targets NVPTX and AMDGCN. // //===----------------------------------------------------------------------===// @@ -621,14 +621,6 @@ public: }; } // anonymous namespace -/// Get the id of the current thread on the GPU. -static llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF) { - return CGF.EmitRuntimeCall( - llvm::Intrinsic::getDeclaration( - &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x), - "nvptx_tid"); -} - /// Get the id of the warp in the block. /// We assume that the warp size is 32, which is always the case /// on the NVPTX device, to generate more efficient code. @@ -636,7 +628,8 @@ static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; unsigned LaneIDBits = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size_Log2); - return Bld.CreateAShr(getNVPTXThreadID(CGF), LaneIDBits, "nvptx_warp_id"); + auto &RT = static_cast(CGF.CGM.getOpenMPRuntime()); + return Bld.CreateAShr(RT.getGPUThreadID(CGF), LaneIDBits, "nvptx_warp_id"); } /// Get the id of the current lane in the Warp. @@ -646,18 +639,11 @@ static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; unsigned LaneIDMask = CGF.getContext().getTargetInfo().getGridValue( llvm::omp::GV_Warp_Size_Log2_Mask); - return Bld.CreateAnd(getNVPTXThreadID(CGF), Bld.getInt32(LaneIDMask), + auto &RT = static_cast(CGF.CGM.getOpenMPRuntime()); + return Bld.CreateAnd(RT.getGPUThreadID(CGF), Bld.getInt32(LaneIDMask), "nvptx_lane_id"); } -/// Get the maximum number of threads in a block of the GPU. -static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) { - return CGF.EmitRuntimeCall( - llvm::Intrinsic::getDeclaration( - &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x), - "nvptx_num_threads"); -} - /// Get the value of the thread_limit clause in the teams directive. /// For the 'generic' execution mode, the runtime encodes thread_limit in /// the launch parameters, always starting thread_limit+warpSize threads per @@ -668,9 +654,9 @@ static llvm::Value *getThreadLimit(CodeGenFunction &CGF, CGBuilderTy &Bld = CGF.Builder; auto &RT = static_cast(CGF.CGM.getOpenMPRuntime()); return IsInSPMDExecutionMode - ? getNVPTXNumThreads(CGF) - : Bld.CreateNUWSub(getNVPTXNumThreads(CGF), RT.getGPUWarpSize(CGF), - "thread_limit"); + ? RT.getGPUNumThreads(CGF) + : Bld.CreateNUWSub(RT.getGPUNumThreads(CGF), + RT.getGPUWarpSize(CGF), "thread_limit"); } /// Get the thread id of the OMP master thread. @@ -682,8 +668,8 @@ static llvm::Value *getThreadLimit(CodeGenFunction &CGF, /// If NumThreads is 1024, master id is 992. static llvm::Value *getMasterThreadID(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; - llvm::Value *NumThreads = getNVPTXNumThreads(CGF); auto &RT = static_cast(CGF.CGM.getOpenMPRuntime()); + llvm::Value *NumThreads = RT.getGPUNumThreads(CGF); // We assume that the warp size is a power of 2. llvm::Value *Mask = Bld.CreateNUWSub(RT.getGPUWarpSize(CGF), Bld.getInt32(1)); @@ -1235,8 +1221,9 @@ void CGOpenMPRuntimeGPU::emitNonSPMDEntryHeader(CodeGenFunction &CGF, llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master"); EST.ExitBB = CGF.createBasicBlock(".exit"); + auto &RT = static_cast(CGF.CGM.getOpenMPRuntime()); llvm::Value *IsWorker = - Bld.CreateICmpULT(getNVPTXThreadID(CGF), getThreadLimit(CGF)); + Bld.CreateICmpULT(RT.getGPUThreadID(CGF), getThreadLimit(CGF)); Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB); CGF.EmitBlock(WorkerBB); @@ -1245,7 +1232,7 @@ void CGOpenMPRuntimeGPU::emitNonSPMDEntryHeader(CodeGenFunction &CGF, CGF.EmitBlock(MasterCheckBB); llvm::Value *IsMaster = - Bld.CreateICmpEQ(getNVPTXThreadID(CGF), getMasterThreadID(CGF)); + Bld.CreateICmpEQ(RT.getGPUThreadID(CGF), getMasterThreadID(CGF)); Bld.CreateCondBr(IsMaster, MasterBB, EST.ExitBB); CGF.EmitBlock(MasterBB); @@ -2780,14 +2767,16 @@ void CGOpenMPRuntimeGPU::emitCriticalRegion( llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.critical.body"); llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.critical.exit"); + auto &RT = static_cast(CGF.CGM.getOpenMPRuntime()); + // Get the mask of active threads in the warp. llvm::Value *Mask = CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_warp_active_thread_mask)); // Fetch team-local id of the thread. - llvm::Value *ThreadID = getNVPTXThreadID(CGF); + llvm::Value *ThreadID = RT.getGPUThreadID(CGF); // Get the width of the team. - llvm::Value *TeamWidth = getNVPTXNumThreads(CGF); + llvm::Value *TeamWidth = RT.getGPUNumThreads(CGF); // Initialize the counter variable for the loop. QualType Int32Ty = @@ -3250,8 +3239,9 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, CGM.addCompilerUsedGlobal(TransferMedium); } + auto &RT = static_cast(CGF.CGM.getOpenMPRuntime()); // Get the CUDA thread id of the current OpenMP thread on the GPU. - llvm::Value *ThreadID = getNVPTXThreadID(CGF); + llvm::Value *ThreadID = RT.getGPUThreadID(CGF); // nvptx_lane_id = nvptx_id % warpsize llvm::Value *LaneID = getNVPTXLaneID(CGF); // nvptx_warp_id = nvptx_id / warpsize @@ -4844,9 +4834,11 @@ void CGOpenMPRuntimeGPU::getDefaultDistScheduleAndChunk( CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, llvm::Value *&Chunk) const { + auto &RT = static_cast(CGF.CGM.getOpenMPRuntime()); if (getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD) { ScheduleKind = OMPC_DIST_SCHEDULE_static; - Chunk = CGF.EmitScalarConversion(getNVPTXNumThreads(CGF), + Chunk = CGF.EmitScalarConversion( + RT.getGPUNumThreads(CGF), CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), S.getIterationVariable()->getType(), S.getBeginLoc()); return; diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h index 316333072c5b..7267511ca672 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // // This provides a generalized class for OpenMP runtime code generation -// specialized by GPU target NVPTX. +// specialized by GPU targets NVPTX and AMDGCN. // //===----------------------------------------------------------------------===// @@ -199,9 +199,18 @@ public: void clear() override; /// Declare generalized virtual functions which need to be defined - /// by all specializations of OpenMPGPURuntime Targets. + /// by all specializations of OpenMPGPURuntime Targets like AMDGCN + /// and NVPTX. + + /// Get the GPU warp size. virtual llvm::Value *getGPUWarpSize(CodeGenFunction &CGF) = 0; + /// Get the id of the current thread on the GPU. + virtual llvm::Value *getGPUThreadID(CodeGenFunction &CGF) = 0; + + /// Get the maximum number of threads in a block of the GPU. + virtual llvm::Value *getGPUNumThreads(CodeGenFunction &CGF) = 0; + /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 /// global_tid, int proc_bind) to generate code for 'proc_bind' clause. virtual void emitProcBindClause(CodeGenFunction &CGF, diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 5fefc95ee413..1688d07b90b6 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -32,10 +32,25 @@ CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM) llvm_unreachable("OpenMP NVPTX can only handle device code."); } -/// Get the GPU warp size. llvm::Value *CGOpenMPRuntimeNVPTX::getGPUWarpSize(CodeGenFunction &CGF) { return CGF.EmitRuntimeCall( llvm::Intrinsic::getDeclaration( &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize), "nvptx_warp_size"); } + +llvm::Value *CGOpenMPRuntimeNVPTX::getGPUThreadID(CodeGenFunction &CGF) { + CGBuilderTy &Bld = CGF.Builder; + llvm::Function *F; + F = llvm::Intrinsic::getDeclaration( + &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x); + return Bld.CreateCall(F, llvm::None, "nvptx_tid"); +} + +llvm::Value *CGOpenMPRuntimeNVPTX::getGPUNumThreads(CodeGenFunction &CGF) { + CGBuilderTy &Bld = CGF.Builder; + llvm::Function *F; + F = llvm::Intrinsic::getDeclaration( + &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x); + return Bld.CreateCall(F, llvm::None, "nvptx_num_threads"); +} diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index 6dab79e6e20a..5f1602959266 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -22,11 +22,19 @@ namespace clang { namespace CodeGen { -class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntimeGPU { +class CGOpenMPRuntimeNVPTX final : public CGOpenMPRuntimeGPU { public: explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM); + + /// Get the GPU warp size. llvm::Value *getGPUWarpSize(CodeGenFunction &CGF) override; + + /// Get the id of the current thread on the GPU. + llvm::Value *getGPUThreadID(CodeGenFunction &CGF) override; + + /// Get the maximum number of threads in a block of the GPU. + llvm::Value *getGPUNumThreads(CodeGenFunction &CGF) override; }; } // CodeGen namespace. diff --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt index 88647a2007fb..f47ecd9bf846 100644 --- a/clang/lib/CodeGen/CMakeLists.txt +++ b/clang/lib/CodeGen/CMakeLists.txt @@ -58,6 +58,7 @@ add_clang_library(clangCodeGen CGObjCRuntime.cpp CGOpenCLRuntime.cpp CGOpenMPRuntime.cpp + CGOpenMPRuntimeAMDGCN.cpp CGOpenMPRuntimeGPU.cpp CGOpenMPRuntimeNVPTX.cpp CGRecordLayoutBuilder.cpp diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 48a1dddfb331..f3712ea1f541 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -19,6 +19,7 @@ #include "CGObjCRuntime.h" #include "CGOpenCLRuntime.h" #include "CGOpenMPRuntime.h" +#include "CGOpenMPRuntimeAMDGCN.h" #include "CGOpenMPRuntimeNVPTX.h" #include "CodeGenFunction.h" #include "CodeGenPGO.h" @@ -215,6 +216,11 @@ void CodeGenModule::createOpenMPRuntime() { "OpenMP NVPTX is only prepared to deal with device code."); OpenMPRuntime.reset(new CGOpenMPRuntimeNVPTX(*this)); break; + case llvm::Triple::amdgcn: + assert(getLangOpts().OpenMPIsDevice && + "OpenMP AMDGCN is only prepared to deal with device code."); + OpenMPRuntime.reset(new CGOpenMPRuntimeAMDGCN(*this)); + break; default: if (LangOpts.OpenMPSimd) OpenMPRuntime.reset(new CGOpenMPSIMDRuntime(*this)); diff --git a/clang/test/OpenMP/amdgcn_target_codegen.cpp b/clang/test/OpenMP/amdgcn_target_codegen.cpp new file mode 100644 index 000000000000..0b6f2d40ffe8 --- /dev/null +++ b/clang/test/OpenMP/amdgcn_target_codegen.cpp @@ -0,0 +1,43 @@ +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +#define N 1000 + +int test_amdgcn_target_tid_threads() { +// CHECK-LABEL: define weak void @{{.*}}test_amdgcn_target_tid_threads + + int arr[N]; + +// CHECK: [[NUM_THREADS:%.+]] = call i64 @__ockl_get_local_size(i32 0) +// CHECK-NEXT: [[VAR:%.+]] = trunc i64 [[NUM_THREADS]] to i32 +// CHECK-NEXT: sub nuw i32 [[VAR]], 64 +// CHECK: call i32 @llvm.amdgcn.workitem.id.x() +#pragma omp target + for (int i = 0; i < N; i++) { + arr[i] = 1; + } + + return arr[0]; +} + +int test_amdgcn_target_tid_threads_simd() { +// CHECK-LABEL: define weak void @{{.*}}test_amdgcn_target_tid_threads_simd + + int arr[N]; + +// CHECK: [[NUM_THREADS:%.+]] = call i64 @__ockl_get_local_size(i32 0) +// CHECK-NEXT: [[VAR:%.+]] = trunc i64 [[NUM_THREADS]] to i32 +// CHECK-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[VAR]], i16 0, i16 0) +#pragma omp target simd + for (int i = 0; i < N; i++) { + arr[i] = 1; + } + return arr[0]; +} + +#endif diff --git a/clang/test/OpenMP/amdgcn_target_init_temp_alloca.cpp b/clang/test/OpenMP/amdgcn_target_init_temp_alloca.cpp new file mode 100644 index 000000000000..4ed953a9ebf7 --- /dev/null +++ b/clang/test/OpenMP/amdgcn_target_init_temp_alloca.cpp @@ -0,0 +1,24 @@ +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s +// expected-no-diagnostics + +#define N 100 + +int test_amdgcn_target_temp_alloca() { + // CHECK-LABEL: test_amdgcn_target_temp_alloca + + int arr[N]; + + // CHECK: [[VAR_ADDR:%.+]] = alloca [100 x i32]*, align 8, addrspace(5) + // CHECK-NEXT: [[VAR_ADDR_CAST:%.+]] = addrspacecast [100 x i32]* addrspace(5)* [[VAR_ADDR]] to [100 x i32]** + // CHECK: store [100 x i32]* [[VAR:%.+]], [100 x i32]** [[VAR_ADDR_CAST]], align 8 + +#pragma omp target + for (int i = 0; i < N; i++) { + arr[i] = 1; + } + + return arr[0]; +}