forked from OSchip/llvm-project
[OPENMP][NVPTX]Allow to use shared memory for the
target|teams|distribute variables. If the total size of the variables, declared in target|teams|distribute regions, is less than the maximal size of shared memory available, the buffer is allocated in the shared memory. llvm-svn: 346507
This commit is contained in:
parent
b2091c930b
commit
09c9eea78f
|
@ -176,6 +176,9 @@ enum MachineConfiguration : unsigned {
|
|||
|
||||
/// Global memory alignment for performance.
|
||||
GlobalMemoryAlignment = 128,
|
||||
|
||||
/// Maximal size of the shared memory buffer.
|
||||
SharedMemorySize = 128,
|
||||
};
|
||||
|
||||
enum NamedBarrier : unsigned {
|
||||
|
@ -1143,13 +1146,6 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDKernel(const OMPExecutableDirective &D,
|
|||
IsInTTDRegion = true;
|
||||
// Reserve place for the globalized memory.
|
||||
GlobalizedRecords.emplace_back();
|
||||
if (!StaticGlobalized) {
|
||||
StaticGlobalized = new llvm::GlobalVariable(
|
||||
CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/true,
|
||||
llvm::GlobalValue::WeakAnyLinkage, nullptr,
|
||||
"_openmp_static_glob_rd$ptr");
|
||||
StaticGlobalized->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
|
||||
}
|
||||
if (!KernelStaticGlobalized) {
|
||||
KernelStaticGlobalized = new llvm::GlobalVariable(
|
||||
CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false,
|
||||
|
@ -1277,13 +1273,6 @@ void CGOpenMPRuntimeNVPTX::emitSPMDKernel(const OMPExecutableDirective &D,
|
|||
IsInTTDRegion = true;
|
||||
// Reserve place for the globalized memory.
|
||||
GlobalizedRecords.emplace_back();
|
||||
if (!StaticGlobalized) {
|
||||
StaticGlobalized = new llvm::GlobalVariable(
|
||||
CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/true,
|
||||
llvm::GlobalValue::WeakAnyLinkage, nullptr,
|
||||
"_openmp_static_glob_rd$ptr");
|
||||
StaticGlobalized->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
|
||||
}
|
||||
if (!KernelStaticGlobalized) {
|
||||
KernelStaticGlobalized = new llvm::GlobalVariable(
|
||||
CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false,
|
||||
|
@ -2138,30 +2127,41 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
|
|||
GlobalizedRecords.back().Records.push_back(GlobalizedVarsRecord);
|
||||
++GlobalizedRecords.back().RegionCounter;
|
||||
if (GlobalizedRecords.back().Records.size() == 1) {
|
||||
assert(StaticGlobalized &&
|
||||
"Static pointer must be initialized already.");
|
||||
Address Buffer = CGF.EmitLoadOfPointer(
|
||||
Address(StaticGlobalized, CGM.getPointerAlign()),
|
||||
CGM.getContext()
|
||||
.getPointerType(CGM.getContext().VoidPtrTy)
|
||||
.castAs<PointerType>());
|
||||
assert(KernelStaticGlobalized &&
|
||||
"Kernel static pointer must be initialized already.");
|
||||
auto *UseSharedMemory = new llvm::GlobalVariable(
|
||||
CGM.getModule(), CGM.Int16Ty, /*isConstant=*/true,
|
||||
llvm::GlobalValue::InternalLinkage, nullptr,
|
||||
"_openmp_static_kernel$is_shared");
|
||||
UseSharedMemory->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
|
||||
QualType Int16Ty = CGM.getContext().getIntTypeForBitwidth(
|
||||
/*DestWidth=*/16, /*Signed=*/0);
|
||||
llvm::Value *IsInSharedMemory = CGF.EmitLoadOfScalar(
|
||||
Address(UseSharedMemory,
|
||||
CGM.getContext().getTypeAlignInChars(Int16Ty)),
|
||||
/*Volatile=*/false, Int16Ty, Loc);
|
||||
auto *StaticGlobalized = new llvm::GlobalVariable(
|
||||
CGM.getModule(), CGM.Int8Ty, /*isConstant=*/false,
|
||||
llvm::GlobalValue::WeakAnyLinkage, nullptr);
|
||||
auto *RecSize = new llvm::GlobalVariable(
|
||||
CGM.getModule(), CGM.SizeTy, /*isConstant=*/true,
|
||||
llvm::GlobalValue::InternalLinkage, nullptr,
|
||||
"_openmp_static_kernel$size");
|
||||
RecSize->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
|
||||
llvm::Value *Ld = CGF.EmitLoadOfScalar(
|
||||
Address(RecSize, CGM.getPointerAlign()), /*Volatile=*/false,
|
||||
Address(RecSize, CGM.getSizeAlign()), /*Volatile=*/false,
|
||||
CGM.getContext().getSizeType(), Loc);
|
||||
llvm::Value *ResAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
|
||||
KernelStaticGlobalized, CGM.VoidPtrPtrTy);
|
||||
llvm::Value *GlobalRecordSizeArg[] = {
|
||||
Buffer.getPointer(), Ld,
|
||||
llvm::ConstantInt::getNullValue(CGM.Int16Ty), ResAddr};
|
||||
llvm::Value *GlobalRecordSizeArg[] = {StaticGlobalized, Ld,
|
||||
IsInSharedMemory, ResAddr};
|
||||
CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
|
||||
OMPRTL_NVPTX__kmpc_get_team_static_memory),
|
||||
GlobalRecordSizeArg);
|
||||
GlobalizedRecords.back().Buffer = StaticGlobalized;
|
||||
GlobalizedRecords.back().RecSize = RecSize;
|
||||
GlobalizedRecords.back().UseSharedMemory = UseSharedMemory;
|
||||
GlobalizedRecords.back().Loc = Loc;
|
||||
}
|
||||
assert(KernelStaticGlobalized && "Global address must be set already.");
|
||||
Address FrameAddr = CGF.EmitLoadOfPointer(
|
||||
|
@ -2336,10 +2336,16 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF,
|
|||
--GlobalizedRecords.back().RegionCounter;
|
||||
// Emit the restore function only in the target region.
|
||||
if (GlobalizedRecords.back().RegionCounter == 0) {
|
||||
QualType Int16Ty = CGM.getContext().getIntTypeForBitwidth(
|
||||
/*DestWidth=*/16, /*Signed=*/0);
|
||||
llvm::Value *IsInSharedMemory = CGF.EmitLoadOfScalar(
|
||||
Address(GlobalizedRecords.back().UseSharedMemory,
|
||||
CGM.getContext().getTypeAlignInChars(Int16Ty)),
|
||||
/*Volatile=*/false, Int16Ty, GlobalizedRecords.back().Loc);
|
||||
CGF.EmitRuntimeCall(
|
||||
createNVPTXRuntimeFunction(
|
||||
OMPRTL_NVPTX__kmpc_restore_team_static_memory),
|
||||
llvm::ConstantInt::getNullValue(CGM.Int16Ty));
|
||||
IsInSharedMemory);
|
||||
}
|
||||
} else {
|
||||
CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
|
||||
|
@ -4507,21 +4513,24 @@ static std::pair<unsigned, unsigned> getSMsBlocksPerSM(CodeGenModule &CGM) {
|
|||
void CGOpenMPRuntimeNVPTX::clear() {
|
||||
if (!GlobalizedRecords.empty()) {
|
||||
ASTContext &C = CGM.getContext();
|
||||
llvm::SmallVector<const GlobalPtrSizeRecsTy *, 4> GlobalRecs;
|
||||
llvm::SmallVector<const GlobalPtrSizeRecsTy *, 4> SharedRecs;
|
||||
RecordDecl *StaticRD = C.buildImplicitRecord(
|
||||
"_openmp_static_memory_type_$_", RecordDecl::TagKind::TTK_Union);
|
||||
StaticRD->startDefinition();
|
||||
RecordDecl *SharedStaticRD = C.buildImplicitRecord(
|
||||
"_shared_openmp_static_memory_type_$_", RecordDecl::TagKind::TTK_Union);
|
||||
SharedStaticRD->startDefinition();
|
||||
for (const GlobalPtrSizeRecsTy &Records : GlobalizedRecords) {
|
||||
if (Records.Records.empty())
|
||||
continue;
|
||||
unsigned Size = 0;
|
||||
unsigned RecAlignment = 0;
|
||||
for (const RecordDecl *RD : Records.Records) {
|
||||
QualType RDTy = CGM.getContext().getRecordType(RD);
|
||||
unsigned Alignment =
|
||||
CGM.getContext().getTypeAlignInChars(RDTy).getQuantity();
|
||||
QualType RDTy = C.getRecordType(RD);
|
||||
unsigned Alignment = C.getTypeAlignInChars(RDTy).getQuantity();
|
||||
RecAlignment = std::max(RecAlignment, Alignment);
|
||||
unsigned RecSize =
|
||||
CGM.getContext().getTypeSizeInChars(RDTy).getQuantity();
|
||||
unsigned RecSize = C.getTypeSizeInChars(RDTy).getQuantity();
|
||||
Size =
|
||||
llvm::alignTo(llvm::alignTo(Size, Alignment) + RecSize, Alignment);
|
||||
}
|
||||
|
@ -4529,32 +4538,67 @@ void CGOpenMPRuntimeNVPTX::clear() {
|
|||
llvm::APInt ArySize(/*numBits=*/64, Size);
|
||||
QualType SubTy = C.getConstantArrayType(
|
||||
C.CharTy, ArySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
|
||||
auto *Field = FieldDecl::Create(
|
||||
C, StaticRD, SourceLocation(), SourceLocation(), nullptr, SubTy,
|
||||
C.getTrivialTypeSourceInfo(SubTy, SourceLocation()),
|
||||
/*BW=*/nullptr, /*Mutable=*/false,
|
||||
/*InitStyle=*/ICIS_NoInit);
|
||||
const bool UseSharedMemory = Size <= SharedMemorySize;
|
||||
auto *Field =
|
||||
FieldDecl::Create(C, UseSharedMemory ? SharedStaticRD : StaticRD,
|
||||
SourceLocation(), SourceLocation(), nullptr, SubTy,
|
||||
C.getTrivialTypeSourceInfo(SubTy, SourceLocation()),
|
||||
/*BW=*/nullptr, /*Mutable=*/false,
|
||||
/*InitStyle=*/ICIS_NoInit);
|
||||
Field->setAccess(AS_public);
|
||||
StaticRD->addDecl(Field);
|
||||
if (UseSharedMemory) {
|
||||
SharedStaticRD->addDecl(Field);
|
||||
SharedRecs.push_back(&Records);
|
||||
} else {
|
||||
StaticRD->addDecl(Field);
|
||||
GlobalRecs.push_back(&Records);
|
||||
}
|
||||
Records.RecSize->setInitializer(llvm::ConstantInt::get(CGM.SizeTy, Size));
|
||||
Records.UseSharedMemory->setInitializer(
|
||||
llvm::ConstantInt::get(CGM.Int16Ty, UseSharedMemory ? 1 : 0));
|
||||
}
|
||||
SharedStaticRD->completeDefinition();
|
||||
if (!SharedStaticRD->field_empty()) {
|
||||
QualType StaticTy = C.getRecordType(SharedStaticRD);
|
||||
llvm::Type *LLVMStaticTy = CGM.getTypes().ConvertTypeForMem(StaticTy);
|
||||
auto *GV = new llvm::GlobalVariable(
|
||||
CGM.getModule(), LLVMStaticTy,
|
||||
/*isConstant=*/false, llvm::GlobalValue::WeakAnyLinkage,
|
||||
llvm::Constant::getNullValue(LLVMStaticTy),
|
||||
"_openmp_shared_static_glob_rd_$_", /*InsertBefore=*/nullptr,
|
||||
llvm::GlobalValue::NotThreadLocal,
|
||||
C.getTargetAddressSpace(LangAS::cuda_shared));
|
||||
auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
|
||||
GV, CGM.VoidPtrTy);
|
||||
for (const GlobalPtrSizeRecsTy *Rec : SharedRecs) {
|
||||
Rec->Buffer->replaceAllUsesWith(Replacement);
|
||||
Rec->Buffer->eraseFromParent();
|
||||
}
|
||||
}
|
||||
StaticRD->completeDefinition();
|
||||
QualType StaticTy = C.getRecordType(StaticRD);
|
||||
std::pair<unsigned, unsigned> SMsBlockPerSM = getSMsBlocksPerSM(CGM);
|
||||
llvm::APInt Size1(32, SMsBlockPerSM.second);
|
||||
QualType Arr1Ty = C.getConstantArrayType(StaticTy, Size1, ArrayType::Normal,
|
||||
/*IndexTypeQuals=*/0);
|
||||
llvm::APInt Size2(32, SMsBlockPerSM.first);
|
||||
QualType Arr2Ty = C.getConstantArrayType(Arr1Ty, Size2, ArrayType::Normal,
|
||||
/*IndexTypeQuals=*/0);
|
||||
llvm::Type *LLVMArr2Ty = CGM.getTypes().ConvertTypeForMem(Arr2Ty);
|
||||
auto *GV = new llvm::GlobalVariable(
|
||||
CGM.getModule(), LLVMArr2Ty,
|
||||
/*isConstant=*/false, llvm::GlobalValue::WeakAnyLinkage,
|
||||
llvm::Constant::getNullValue(LLVMArr2Ty), "_openmp_static_glob_rd_$_");
|
||||
StaticGlobalized->setInitializer(
|
||||
llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV,
|
||||
CGM.VoidPtrTy));
|
||||
if (!StaticRD->field_empty()) {
|
||||
QualType StaticTy = C.getRecordType(StaticRD);
|
||||
std::pair<unsigned, unsigned> SMsBlockPerSM = getSMsBlocksPerSM(CGM);
|
||||
llvm::APInt Size1(32, SMsBlockPerSM.second);
|
||||
QualType Arr1Ty =
|
||||
C.getConstantArrayType(StaticTy, Size1, ArrayType::Normal,
|
||||
/*IndexTypeQuals=*/0);
|
||||
llvm::APInt Size2(32, SMsBlockPerSM.first);
|
||||
QualType Arr2Ty = C.getConstantArrayType(Arr1Ty, Size2, ArrayType::Normal,
|
||||
/*IndexTypeQuals=*/0);
|
||||
llvm::Type *LLVMArr2Ty = CGM.getTypes().ConvertTypeForMem(Arr2Ty);
|
||||
auto *GV = new llvm::GlobalVariable(
|
||||
CGM.getModule(), LLVMArr2Ty,
|
||||
/*isConstant=*/false, llvm::GlobalValue::WeakAnyLinkage,
|
||||
llvm::Constant::getNullValue(LLVMArr2Ty),
|
||||
"_openmp_static_glob_rd_$_");
|
||||
auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
|
||||
GV, CGM.VoidPtrTy);
|
||||
for (const GlobalPtrSizeRecsTy *Rec : GlobalRecs) {
|
||||
Rec->Buffer->replaceAllUsesWith(Replacement);
|
||||
Rec->Buffer->eraseFromParent();
|
||||
}
|
||||
}
|
||||
}
|
||||
CGOpenMPRuntime::clear();
|
||||
}
|
||||
|
|
|
@ -420,14 +420,14 @@ private:
|
|||
/// union. This resulting union (one per CU) is the entry point for the static
|
||||
/// memory management runtime functions.
|
||||
struct GlobalPtrSizeRecsTy {
|
||||
llvm::GlobalVariable *UseSharedMemory = nullptr;
|
||||
llvm::GlobalVariable *RecSize = nullptr;
|
||||
llvm::GlobalVariable *Buffer = nullptr;
|
||||
SourceLocation Loc;
|
||||
llvm::SmallVector<const RecordDecl *, 2> Records;
|
||||
unsigned RegionCounter = 0;
|
||||
};
|
||||
llvm::SmallVector<GlobalPtrSizeRecsTy, 8> GlobalizedRecords;
|
||||
/// Global variable used for staticlly allocated global memoryused for
|
||||
/// globalization in target/teams/distribute regions.
|
||||
llvm::GlobalVariable *StaticGlobalized = nullptr;
|
||||
/// Shared pointer for the global memory in the global memory buffer used for
|
||||
/// the given kernel.
|
||||
llvm::GlobalVariable *KernelStaticGlobalized = nullptr;
|
||||
|
|
|
@ -27,10 +27,10 @@ void test_ds(){
|
|||
}
|
||||
}
|
||||
// CK1: [[MEM_TY:%.+]] = type { [8 x i8] }
|
||||
// CK1-DAG: [[GLOBAL_RD:@.+]] = weak global [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]] zeroinitializer
|
||||
// CK1-DAG: [[GLOBAL_RD_PTR:@.+]] = weak unnamed_addr constant i8* getelementptr inbounds ([{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]], [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]]* [[GLOBAL_RD]], i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0)
|
||||
// CK1-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] zeroinitializer
|
||||
// CK1-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
|
||||
// CK1-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i64 8
|
||||
// CK1-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1
|
||||
|
||||
/// ========= In the worker function ========= ///
|
||||
// CK1: {{.*}}define internal void @__omp_offloading{{.*}}test_ds{{.*}}_worker()
|
||||
|
@ -44,9 +44,9 @@ void test_ds(){
|
|||
// CK1: [[SHAREDARGS2:%.+]] = alloca i8**
|
||||
// CK1: call void @__kmpc_kernel_init
|
||||
// CK1: call void @__kmpc_data_sharing_init_stack
|
||||
// CK1: [[GLOBAL_RD:%.+]] = load i8*, i8** [[GLOBAL_RD_PTR]],
|
||||
// CK1: [[SHARED_MEM_FLAG:%.+]] = load i16, i16* [[KERNEL_SHARED]],
|
||||
// CK1: [[SIZE:%.+]] = load i64, i64* [[KERNEL_SIZE]],
|
||||
// CK1: call void @__kmpc_get_team_static_memory(i8* [[GLOBAL_RD]], i64 [[SIZE]], i16 0, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
|
||||
// CK1: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i64 [[SIZE]], i16 [[SHARED_MEM_FLAG]], i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
|
||||
// CK1: [[KERNEL_RD:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]],
|
||||
// CK1: [[GLOBALSTACK:%.+]] = getelementptr inbounds i8, i8* [[KERNEL_RD]], i64 0
|
||||
// CK1: [[GLOBALSTACK2:%.+]] = bitcast i8* [[GLOBALSTACK]] to %struct._globalized_locals_ty*
|
||||
|
@ -75,7 +75,8 @@ void test_ds(){
|
|||
// CK1: call void @llvm.nvvm.barrier0()
|
||||
// CK1: call void @llvm.nvvm.barrier0()
|
||||
// CK1: call void @__kmpc_end_sharing_variables()
|
||||
// CK1: call void @__kmpc_restore_team_static_memory(i16 0)
|
||||
// CK1: [[SHARED_MEM_FLAG:%.+]] = load i16, i16* [[KERNEL_SHARED]],
|
||||
// CK1: call void @__kmpc_restore_team_static_memory(i16 [[SHARED_MEM_FLAG]])
|
||||
// CK1: call void @__kmpc_kernel_deinit(i16 1)
|
||||
|
||||
/// ========= In the data sharing wrapper function ========= ///
|
||||
|
|
|
@ -22,8 +22,7 @@ int main(int argc, char **argv) {
|
|||
}
|
||||
|
||||
// CHECK: [[MEM_TY:%.+]] = type { [84 x i8] }
|
||||
// CHECK-DAG: [[GLOBAL_RD:@.+]] = weak global [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]] zeroinitializer
|
||||
// CHECK-DAG: [[GLOBAL_RD_PTR:@.+]] = weak unnamed_addr constant i8* getelementptr inbounds ([{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]], [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]]* [[GLOBAL_RD]], i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0)
|
||||
// CHECK-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] zeroinitializer
|
||||
// CHECK-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
|
||||
// CHECK-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 84
|
||||
// CHECK-DAG: @__omp_offloading_{{.*}}_main_l17_exec_mode = weak constant i8 1
|
||||
|
@ -31,8 +30,7 @@ int main(int argc, char **argv) {
|
|||
// CHECK-LABEL: define internal void @__omp_offloading_{{.*}}_main_l17_worker(
|
||||
|
||||
// CHECK: define weak void @__omp_offloading_{{.*}}_main_l17([10 x i32]* dereferenceable(40) %{{.+}}, [10 x i32]* dereferenceable(40) %{{.+}}, i32* dereferenceable(4) %{{.+}}, i{{64|32}} %{{.+}}, [10 x i32]* dereferenceable(40) %{{.+}})
|
||||
// CHECK: [[GLOBAL_RD:%.+]] = load i8*, i8** [[GLOBAL_RD_PTR]],
|
||||
// CHECK: call void @__kmpc_get_team_static_memory(i8* [[GLOBAL_RD]], i{{64|32}} 84, i16 0, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
|
||||
// CHECK: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} 84, i16 1, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
|
||||
// CHECK: [[PTR:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]],
|
||||
// CHECK: [[STACK:%.+]] = bitcast i8* [[PTR]] to %struct._globalized_locals_ty*
|
||||
// CHECK: [[ARGC:%.+]] = load i32, i32* %{{.+}}, align
|
||||
|
@ -48,7 +46,7 @@ int main(int argc, char **argv) {
|
|||
|
||||
// CHECK: call void @__kmpc_for_static_fini(%struct.ident_t* @
|
||||
|
||||
// CHECK: call void @__kmpc_restore_team_static_memory(i16 0)
|
||||
// CHECK: call void @__kmpc_restore_team_static_memory(i16 1)
|
||||
|
||||
// CHECK: define internal void [[PARALLEL]](
|
||||
// CHECK-NOT: call i8* @__kmpc_data_sharing_push_stack(
|
||||
|
|
|
@ -72,10 +72,10 @@ int bar(int n){
|
|||
}
|
||||
|
||||
// CHECK: [[MEM_TY:%.+]] = type { [4 x i8] }
|
||||
// CHECK-DAG: [[GLOBAL_RD:@.+]] = weak global [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]] zeroinitializer
|
||||
// CHECK-DAG: [[GLOBAL_RD_PTR:@.+]] = weak unnamed_addr constant i8* getelementptr inbounds ([{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]], [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]]* [[GLOBAL_RD]], i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0)
|
||||
// CHECK-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] zeroinitializer
|
||||
// CHECK-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
|
||||
// CHECK-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 4
|
||||
// CHECK-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1
|
||||
|
||||
// CHECK-NOT: define {{.*}}void {{@__omp_offloading_.+template.+l17}}_worker()
|
||||
|
||||
|
@ -324,9 +324,9 @@ int bar(int n){
|
|||
// CHECK-32: [[A_ADDR:%.+]] = alloca i32,
|
||||
// CHECK-64: [[A_ADDR:%.+]] = alloca i64,
|
||||
// CHECK-64: [[CONV:%.+]] = bitcast i64* [[A_ADDR]] to i32*
|
||||
// CHECK: [[GLOBAL_RD:%.+]] = load i8*, i8** [[GLOBAL_RD_PTR]],
|
||||
// CHECK: [[IS_SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED]],
|
||||
// CHECK: [[SIZE:%.+]] = load i{{64|32}}, i{{64|32}}* [[KERNEL_SIZE]],
|
||||
// CHECK: call void @__kmpc_get_team_static_memory(i8* [[GLOBAL_RD]], i{{64|32}} [[SIZE]], i16 0, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
|
||||
// CHECK: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} [[SIZE]], i16 [[IS_SHARED]], i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
|
||||
// CHECK: [[KERNEL_RD:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]],
|
||||
// CHECK: [[STACK:%.+]] = getelementptr inbounds i8, i8* [[KERNEL_RD]], i{{64|32}} 0
|
||||
// CHECK: [[BC:%.+]] = bitcast i8* [[STACK]] to %struct._globalized_locals_ty*
|
||||
|
@ -334,7 +334,8 @@ int bar(int n){
|
|||
// CHECK-64: [[A:%.+]] = load i32, i32* [[CONV]],
|
||||
// CHECK: [[GLOBAL_A_ADDR:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[BC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
|
||||
// CHECK: store i32 [[A]], i32* [[GLOBAL_A_ADDR]],
|
||||
// CHECK: call void @__kmpc_restore_team_static_memory(i16 0)
|
||||
// CHECK: [[IS_SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED]],
|
||||
// CHECK: call void @__kmpc_restore_team_static_memory(i16 [[IS_SHARED]])
|
||||
|
||||
// CHECK-LABEL: define internal void @{{.+}}(i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable{{.*}})
|
||||
// CHECK: [[CC:%.+]] = alloca i32,
|
||||
|
|
|
@ -31,10 +31,10 @@ int bar(int n){
|
|||
}
|
||||
|
||||
// CHECK: [[MEM_TY:%.+]] = type { [4 x i8] }
|
||||
// CHECK-DAG: [[GLOBAL_RD:@.+]] = weak global [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]] zeroinitializer
|
||||
// CHECK-DAG: [[GLOBAL_RD_PTR:@.+]] = weak unnamed_addr constant i8* getelementptr inbounds ([{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]], [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]]* [[GLOBAL_RD]], i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0)
|
||||
// CHECK-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] zeroinitializer
|
||||
// CHECK-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
|
||||
// CHECK-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 4
|
||||
// CHECK-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1
|
||||
|
||||
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l12}}_worker()
|
||||
// CHECK: call void @llvm.nvvm.barrier0()
|
||||
|
@ -45,9 +45,9 @@ int bar(int n){
|
|||
// CHECK: call void @__omp_offloading_{{.*}}l12_worker()
|
||||
// CHECK: call void @__kmpc_kernel_init(
|
||||
// CHECK: call void @__kmpc_data_sharing_init_stack()
|
||||
// CHECK: [[GLOBAL_RD:%.+]] = load i8*, i8** [[GLOBAL_RD_PTR]],
|
||||
// CHECK: [[IS_SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED]],
|
||||
// CHECK: [[SIZE:%.+]] = load i{{64|32}}, i{{64|32}}* [[KERNEL_SIZE]],
|
||||
// CHECK: call void @__kmpc_get_team_static_memory(i8* [[GLOBAL_RD]], i{{64|32}} [[SIZE]], i16 0, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
|
||||
// CHECK: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i64 %7, i16 %6, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
|
||||
// CHECK: [[KERNEL_RD:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]],
|
||||
// CHECK: [[STACK:%.+]] = getelementptr inbounds i8, i8* [[KERNEL_RD]], i{{64|32}} 0
|
||||
// CHECK: call void @__kmpc_kernel_prepare_parallel(
|
||||
|
@ -55,7 +55,8 @@ int bar(int n){
|
|||
// CHECK: call void @llvm.nvvm.barrier0()
|
||||
// CHECK: call void @llvm.nvvm.barrier0()
|
||||
// CHECK: call void @__kmpc_end_sharing_variables()
|
||||
// CHECK: call void @__kmpc_restore_team_static_memory(i16 0)
|
||||
// CHECK: [[IS_SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED]],
|
||||
// CHECK: call void @__kmpc_restore_team_static_memory(i16 [[IS_SHARED]])
|
||||
// CHECK: call void @__kmpc_kernel_deinit(i16 1)
|
||||
|
||||
// CHECK: define internal void @__omp_outlined__(
|
||||
|
|
|
@ -68,23 +68,22 @@ int bar(int n){
|
|||
}
|
||||
|
||||
// CHECK-DAG: [[MEM_TY:%.+]] = type { [4 x i8] }
|
||||
// CHECK-DAG: [[GLOBAL_RD:@.+]] = weak global [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]] zeroinitializer
|
||||
// CHECK-DAG: [[GLOBAL_RD_PTR:@.+]] = weak unnamed_addr constant i8* getelementptr inbounds ([{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]], [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]]* [[GLOBAL_RD]], i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0)
|
||||
// CHECK-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] zeroinitializer
|
||||
// CHECK-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
|
||||
// CHECK-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 4
|
||||
// CHECK-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1
|
||||
|
||||
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l32(
|
||||
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 1)
|
||||
// CHECK: [[GLOBAL_RD:%.+]] = load i8*, i8** [[GLOBAL_RD_PTR]],
|
||||
// CHECK: call void @__kmpc_get_team_static_memory(i8* [[GLOBAL_RD]], i{{64|32}} 4, i16 0, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
|
||||
// CHECK: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} 4, i16 1, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
|
||||
// CHECK: [[TEAM_ALLOC:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]],
|
||||
// CHECK: [[BC:%.+]] = bitcast i8* [[TEAM_ALLOC]] to [[REC:%.+]]*
|
||||
// CHECK: getelementptr inbounds [[REC]], [[REC]]* [[BC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
|
||||
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
|
||||
// CHECK: {{call|invoke}} void [[OUTL1:@.+]](
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
// CHECK: call void @__kmpc_restore_team_static_memory(i16 0)
|
||||
// CHECK: call void @__kmpc_restore_team_static_memory(i16 1)
|
||||
// CHECK: call void @__kmpc_spmd_kernel_deinit()
|
||||
// CHECK: ret void
|
||||
|
||||
|
|
|
@ -63,23 +63,22 @@ int bar(int n){
|
|||
}
|
||||
|
||||
// CHECK-DAG: [[MEM_TY:%.+]] = type { [4 x i8] }
|
||||
// CHECK-DAG: [[GLOBAL_RD:@.+]] = weak global [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]] zeroinitializer
|
||||
// CHECK-DAG: [[GLOBAL_RD_PTR:@.+]] = weak unnamed_addr constant i8* getelementptr inbounds ([{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]], [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]]* [[GLOBAL_RD]], i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0)
|
||||
// CHECK-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] zeroinitializer
|
||||
// CHECK-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
|
||||
// CHECK-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 4
|
||||
// CHECK-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1
|
||||
|
||||
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l30(
|
||||
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 1)
|
||||
// CHECK: [[GLOBAL_RD:%.+]] = load i8*, i8** [[GLOBAL_RD_PTR]],
|
||||
// CHECK: call void @__kmpc_get_team_static_memory(i8* [[GLOBAL_RD]], i{{64|32}} 4, i16 0, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
|
||||
// CHECK: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} 4, i16 1, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
|
||||
// CHECK: [[TEAM_ALLOC:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]],
|
||||
// CHECK: [[BC:%.+]] = bitcast i8* [[TEAM_ALLOC]] to [[REC:%.+]]*
|
||||
// CHECK: getelementptr inbounds [[REC]], [[REC]]* [[BC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
|
||||
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
|
||||
// CHECK: {{call|invoke}} void [[OUTL1:@.+]](
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
// CHECK: call void @__kmpc_restore_team_static_memory(i16 0)
|
||||
// CHECK: call void @__kmpc_restore_team_static_memory(i16 1)
|
||||
// CHECK: call void @__kmpc_spmd_kernel_deinit()
|
||||
// CHECK: ret void
|
||||
|
||||
|
|
|
@ -28,11 +28,12 @@ int main (int argc, char **argv) {
|
|||
}
|
||||
|
||||
// CK1: [[MEM_TY:%.+]] = type { [{{4|8}} x i8] }
|
||||
// CK1-DAG: [[GLOBAL_RD:@.+]] = weak global [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]] zeroinitializer
|
||||
// CK1-DAG: [[GLOBAL_RD_PTR:@.+]] = weak unnamed_addr constant i8* getelementptr inbounds ([{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]], [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]]* [[GLOBAL_RD]], i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0)
|
||||
// CK1-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] zeroinitializer
|
||||
// CK1-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
|
||||
// CK1-DAG: [[KERNEL_SIZE1:@.+]] = internal unnamed_addr constant i{{64|32}} 4
|
||||
// CK1-DAG: [[KERNEL_SIZE2:@.+]] = internal unnamed_addr constant i{{64|32}} {{8|4}}
|
||||
// CK1-DAG: [[KERNEL_SHARED1:@.+]] = internal unnamed_addr constant i16 1
|
||||
// CK1-DAG: [[KERNEL_SHARED2:@.+]] = internal unnamed_addr constant i16 1
|
||||
|
||||
// only nvptx side: do not outline teams region and do not call fork_teams
|
||||
// CK1: define {{.*}}void @{{[^,]+}}(i{{[0-9]+}} [[ARGC:%.+]])
|
||||
|
@ -43,9 +44,9 @@ int main (int argc, char **argv) {
|
|||
// CK1: store {{.+}} 0, {{.+}},
|
||||
// CK1: store i{{[0-9]+}} [[ARGC]], i{{[0-9]+}}* [[ARGCADDR]],
|
||||
// CK1-64: [[CONV:%.+]] = bitcast i{{[0-9]+}}* [[ARGCADDR]] to i{{[0-9]+}}*
|
||||
// CK1: [[GLOBAL_RD:%.+]] = load i8*, i8** [[GLOBAL_RD_PTR]],
|
||||
// CK1: [[IS_SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED1]],
|
||||
// CK1: [[SIZE:%.+]] = load i{{64|32}}, i{{64|32}}* [[KERNEL_SIZE1]],
|
||||
// CK1: call void @__kmpc_get_team_static_memory(i8* [[GLOBAL_RD]], i{{64|32}} [[SIZE]], i16 0, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
|
||||
// CK1: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} [[SIZE]], i16 [[IS_SHARED]], i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
|
||||
// CK1: [[KERNEL_RD:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]],
|
||||
// CK1: [[GLOBALSTACK:%.+]] = getelementptr inbounds i8, i8* [[KERNEL_RD]], i{{64|32}} 0
|
||||
// CK1-64: [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[CONV]]
|
||||
|
@ -64,9 +65,9 @@ int main (int argc, char **argv) {
|
|||
// CK1: [[ARGCADDR_PTR:%.+]] = alloca i{{.+}}***,
|
||||
// CK1: [[ARGCADDR:%.+]] = alloca i{{.+}}**,
|
||||
// CK1: store i{{.+}}** [[ARGC]], i{{.+}}*** [[ARGCADDR]]
|
||||
// CK1: [[GLOBAL_RD:%.+]] = load i8*, i8** [[GLOBAL_RD_PTR]],
|
||||
// CK1: [[IS_SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED2]],
|
||||
// CK1: [[SIZE:%.+]] = load i{{64|32}}, i{{64|32}}* [[KERNEL_SIZE2]],
|
||||
// CK1: call void @__kmpc_get_team_static_memory(i8* [[GLOBAL_RD]], i{{64|32}} [[SIZE]], i16 0, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
|
||||
// CK1: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} [[SIZE]], i16 [[IS_SHARED]], i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
|
||||
// CK1: [[KERNEL_RD:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]],
|
||||
// CK1: [[GLOBALSTACK:%.+]] = getelementptr inbounds i8, i8* [[KERNEL_RD]], i{{64|32}} 0
|
||||
// CK1: [[ARG:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** [[ARGCADDR]]
|
||||
|
@ -114,11 +115,12 @@ int main (int argc, char **argv) {
|
|||
}
|
||||
|
||||
// CK2: [[MEM_TY:%.+]] = type { [{{4|8}} x i8] }
|
||||
// CK2-DAG: [[GLOBAL_RD:@.+]] = weak global [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]] zeroinitializer
|
||||
// CK2-DAG: [[GLOBAL_RD_PTR:@.+]] = weak unnamed_addr constant i8* getelementptr inbounds ([{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]], [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]]* [[GLOBAL_RD]], i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0)
|
||||
// CK2-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] zeroinitializer
|
||||
// CK2-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
|
||||
// CK2-DAG: [[KERNEL_SIZE1:@.+]] = internal unnamed_addr constant i{{64|32}} 4
|
||||
// CK2-DAG: [[KERNEL_SIZE2:@.+]] = internal unnamed_addr constant i{{64|32}} {{8|4}}
|
||||
// CK2-DAG: [[KERNEL_SHARED1:@.+]] = internal unnamed_addr constant i16 1
|
||||
// CK2-DAG: [[KERNEL_SHARED2:@.+]] = internal unnamed_addr constant i16 1
|
||||
|
||||
// CK2: define {{.*}}void @{{[^,]+}}(i{{[0-9]+}} [[A_IN:%.+]], i{{[0-9]+}} [[B_IN:%.+]], i{{[0-9]+}} [[ARGC_IN:.+]])
|
||||
// CK2: {{.}} = alloca i{{[0-9]+}}*,
|
||||
|
@ -133,9 +135,9 @@ int main (int argc, char **argv) {
|
|||
// CK2-64: [[ACONV:%.+]] = bitcast i64* [[AADDR]] to i32*
|
||||
// CK2-64: [[BCONV:%.+]] = bitcast i64* [[BADDR]] to i32*
|
||||
// CK2-64: [[CONV:%.+]] = bitcast i64* [[ARGCADDR]] to i32*
|
||||
// CK2: [[GLOBAL_RD:%.+]] = load i8*, i8** [[GLOBAL_RD_PTR]],
|
||||
// CK2: [[IS_SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED1]],
|
||||
// CK2: [[SIZE:%.+]] = load i{{64|32}}, i{{64|32}}* [[KERNEL_SIZE1]],
|
||||
// CK2: call void @__kmpc_get_team_static_memory(i8* [[GLOBAL_RD]], i{{64|32}} [[SIZE]], i16 0, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
|
||||
// CK2: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} [[SIZE]], i16 [[IS_SHARED]], i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
|
||||
// CK2: [[KERNEL_RD:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]],
|
||||
// CK2: [[GLOBALSTACK:%.+]] = getelementptr inbounds i8, i8* [[KERNEL_RD]], i{{64|32}} 0
|
||||
// CK2-64: [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[CONV]]
|
||||
|
@ -158,9 +160,9 @@ int main (int argc, char **argv) {
|
|||
// CK2: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[AADDR]],
|
||||
// CK2: store i{{[0-9]+}} [[B_IN]], i{{[0-9]+}}* [[BADDR]],
|
||||
// CK2: store i{{[0-9]+}}** [[ARGC]], i{{[0-9]+}}*** [[ARGCADDR]],
|
||||
// CK2: [[GLOBAL_RD:%.+]] = load i8*, i8** [[GLOBAL_RD_PTR]],
|
||||
// CK2: [[IS_SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED2]],
|
||||
// CK2: [[SIZE:%.+]] = load i{{64|32}}, i{{64|32}}* [[KERNEL_SIZE2]],
|
||||
// CK2: call void @__kmpc_get_team_static_memory(i8* [[GLOBAL_RD]], i{{64|32}} [[SIZE]], i16 0, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
|
||||
// CK2: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} [[SIZE]], i16 [[IS_SHARED]], i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
|
||||
// CK2: [[KERNEL_RD:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]],
|
||||
// CK2: [[GLOBALSTACK:%.+]] = getelementptr inbounds i8, i8* [[KERNEL_RD]], i{{64|32}} 0
|
||||
// CK2: [[ARG:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** [[ARGCADDR]]
|
||||
|
|
Loading…
Reference in New Issue