forked from OSchip/llvm-project
[OPENMP] ThreadId in serialized parallel regions is 0.
The first argument for the parallel outlined functions, called as serialized parallel regions, should be a pointer to the global thread id that always is 0. llvm-svn: 337957
This commit is contained in:
parent
b6613ac665
commit
8521ff6ec4
|
@ -2839,12 +2839,12 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
|
||||||
RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
|
RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
|
||||||
|
|
||||||
// OutlinedFn(>id, &zero, CapturedStruct);
|
// OutlinedFn(>id, &zero, CapturedStruct);
|
||||||
Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
|
|
||||||
Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
|
Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
|
||||||
/*Name*/ ".zero.addr");
|
/*Name*/ ".zero.addr");
|
||||||
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
|
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
|
||||||
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
|
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
|
||||||
OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
|
// ThreadId for serialized parallels is 0.
|
||||||
|
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
|
||||||
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
|
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
|
||||||
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
|
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
|
||||||
RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
|
RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
|
||||||
|
|
|
@ -1784,8 +1784,9 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall(
|
||||||
/*DestWidth=*/32, /*Signed=*/1),
|
/*DestWidth=*/32, /*Signed=*/1),
|
||||||
".zero.addr");
|
".zero.addr");
|
||||||
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
|
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
|
||||||
Address ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
|
// ThreadId for serialized parallels is 0.
|
||||||
auto &&CodeGen = [this, Fn, CapturedVars, Loc, ZeroAddr, ThreadIDAddr](
|
Address ThreadIDAddr = ZeroAddr;
|
||||||
|
auto &&CodeGen = [this, Fn, CapturedVars, Loc, ZeroAddr, &ThreadIDAddr](
|
||||||
CodeGenFunction &CGF, PrePostActionTy &Action) {
|
CodeGenFunction &CGF, PrePostActionTy &Action) {
|
||||||
Action.Enter(CGF);
|
Action.Enter(CGF);
|
||||||
|
|
||||||
|
@ -1883,8 +1884,9 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall(
|
||||||
Work.emplace_back(WFn);
|
Work.emplace_back(WFn);
|
||||||
};
|
};
|
||||||
|
|
||||||
auto &&LNParallelGen = [this, Loc, &SeqGen, &L0ParallelGen, &CodeGen](
|
auto &&LNParallelGen = [this, Loc, &SeqGen, &L0ParallelGen, &CodeGen,
|
||||||
CodeGenFunction &CGF, PrePostActionTy &Action) {
|
&ThreadIDAddr](CodeGenFunction &CGF,
|
||||||
|
PrePostActionTy &Action) {
|
||||||
RegionCodeGenTy RCG(CodeGen);
|
RegionCodeGenTy RCG(CodeGen);
|
||||||
if (IsInParallelRegion) {
|
if (IsInParallelRegion) {
|
||||||
SeqGen(CGF, Action);
|
SeqGen(CGF, Action);
|
||||||
|
@ -1936,6 +1938,8 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall(
|
||||||
// There is no need to emit line number for unconditional branch.
|
// There is no need to emit line number for unconditional branch.
|
||||||
(void)ApplyDebugLocation::CreateEmpty(CGF);
|
(void)ApplyDebugLocation::CreateEmpty(CGF);
|
||||||
CGF.EmitBlock(ElseBlock);
|
CGF.EmitBlock(ElseBlock);
|
||||||
|
// In the worker need to use the real thread id.
|
||||||
|
ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
|
||||||
RCG(CGF);
|
RCG(CGF);
|
||||||
// There is no need to emit line number for unconditional branch.
|
// There is no need to emit line number for unconditional branch.
|
||||||
(void)ApplyDebugLocation::CreateEmpty(CGF);
|
(void)ApplyDebugLocation::CreateEmpty(CGF);
|
||||||
|
@ -1965,10 +1969,11 @@ void CGOpenMPRuntimeNVPTX::emitSPMDParallelCall(
|
||||||
/*DestWidth=*/32, /*Signed=*/1),
|
/*DestWidth=*/32, /*Signed=*/1),
|
||||||
".zero.addr");
|
".zero.addr");
|
||||||
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
|
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
|
||||||
Address ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
|
// ThreadId for serialized parallels is 0.
|
||||||
|
Address ThreadIDAddr = ZeroAddr;
|
||||||
auto &&CodeGen = [this, OutlinedFn, CapturedVars, Loc, ZeroAddr,
|
auto &&CodeGen = [this, OutlinedFn, CapturedVars, Loc, ZeroAddr,
|
||||||
ThreadIDAddr](CodeGenFunction &CGF,
|
&ThreadIDAddr](CodeGenFunction &CGF,
|
||||||
PrePostActionTy &Action) {
|
PrePostActionTy &Action) {
|
||||||
Action.Enter(CGF);
|
Action.Enter(CGF);
|
||||||
|
|
||||||
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
|
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
|
||||||
|
@ -1995,6 +2000,8 @@ void CGOpenMPRuntimeNVPTX::emitSPMDParallelCall(
|
||||||
};
|
};
|
||||||
|
|
||||||
if (IsInTargetMasterThreadRegion) {
|
if (IsInTargetMasterThreadRegion) {
|
||||||
|
// In the worker need to use the real thread id.
|
||||||
|
ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
|
||||||
RegionCodeGenTy RCG(CodeGen);
|
RegionCodeGenTy RCG(CodeGen);
|
||||||
RCG(CGF);
|
RCG(CGF);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -562,7 +562,6 @@ int baz(int f, double &a) {
|
||||||
// CHECK: [[REC_ADDR:%.+]] = bitcast i8* [[PTR]] to %struct._globalized_locals_ty*
|
// CHECK: [[REC_ADDR:%.+]] = bitcast i8* [[PTR]] to %struct._globalized_locals_ty*
|
||||||
// CHECK: [[F_PTR:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[REC_ADDR]], i32 0, i32 0
|
// CHECK: [[F_PTR:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[REC_ADDR]], i32 0, i32 0
|
||||||
// CHECK: store i32 %{{.+}}, i32* [[F_PTR]],
|
// CHECK: store i32 %{{.+}}, i32* [[F_PTR]],
|
||||||
// CHECK: store i32 [[GTID]], i32* [[GTID_ADDR]],
|
|
||||||
|
|
||||||
// CHECK: [[RES:%.+]] = call i8 @__kmpc_is_spmd_exec_mode()
|
// CHECK: [[RES:%.+]] = call i8 @__kmpc_is_spmd_exec_mode()
|
||||||
// CHECK: icmp ne i8 [[RES]], 0
|
// CHECK: icmp ne i8 [[RES]], 0
|
||||||
|
@ -573,7 +572,7 @@ int baz(int f, double &a) {
|
||||||
// CHECK: br i1
|
// CHECK: br i1
|
||||||
|
|
||||||
// CHECK: call void @__kmpc_serialized_parallel(%struct.ident_t* @{{.+}}, i32 [[GTID]])
|
// CHECK: call void @__kmpc_serialized_parallel(%struct.ident_t* @{{.+}}, i32 [[GTID]])
|
||||||
// CHECK: call void [[OUTLINED:@.+]](i32* [[GTID_ADDR]], i32* [[ZERO_ADDR]], i32* [[F_PTR]], double* %{{.+}})
|
// CHECK: call void [[OUTLINED:@.+]](i32* [[ZERO_ADDR]], i32* [[ZERO_ADDR]], i32* [[F_PTR]], double* %{{.+}})
|
||||||
// CHECK: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @{{.+}}, i32 [[GTID]])
|
// CHECK: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @{{.+}}, i32 [[GTID]])
|
||||||
// CHECK: br label
|
// CHECK: br label
|
||||||
|
|
||||||
|
@ -591,6 +590,7 @@ int baz(int f, double &a) {
|
||||||
// CHECK: call void @__kmpc_end_sharing_variables()
|
// CHECK: call void @__kmpc_end_sharing_variables()
|
||||||
// CHECK: br label
|
// CHECK: br label
|
||||||
|
|
||||||
|
// CHECK: store i32 [[GTID]], i32* [[GTID_ADDR]],
|
||||||
// CHECK: call void [[OUTLINED]](i32* [[GTID_ADDR]], i32* [[ZERO_ADDR]], i32* [[F_PTR]], double* %{{.+}})
|
// CHECK: call void [[OUTLINED]](i32* [[GTID_ADDR]], i32* [[ZERO_ADDR]], i32* [[F_PTR]], double* %{{.+}})
|
||||||
// CHECK: br label
|
// CHECK: br label
|
||||||
|
|
||||||
|
|
|
@ -29,12 +29,12 @@ void gtid_test() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// CHECK: define internal {{.*}}void [[GTID_TEST_REGION1]](i{{.+}}* noalias [[GTID_PARAM:%.+]], i32* noalias
|
// CHECK: define internal {{.*}}void [[GTID_TEST_REGION1]](i{{.+}}* noalias [[GTID_PARAM:%.+]], i32* noalias
|
||||||
|
// CHECK: store i32 0, i32* [[ZERO_ADDR:%.+]],
|
||||||
// CHECK: store i{{[0-9]+}}* [[GTID_PARAM]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]],
|
// CHECK: store i{{[0-9]+}}* [[GTID_PARAM]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]],
|
||||||
// CHECK: [[GTID_ADDR:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_REF]]
|
// CHECK: [[GTID_ADDR:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_REF]]
|
||||||
// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_ADDR]]
|
// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_ADDR]]
|
||||||
// CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]])
|
// CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]])
|
||||||
// CHECK: [[GTID_ADDR:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_REF]]
|
// CHECK: call void [[GTID_TEST_REGION2:@.+]](i{{[0-9]+}}* [[ZERO_ADDR]]
|
||||||
// CHECK: call void [[GTID_TEST_REGION2:@.+]](i{{[0-9]+}}* [[GTID_ADDR]]
|
|
||||||
// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]])
|
// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]])
|
||||||
// CHECK: ret void
|
// CHECK: ret void
|
||||||
|
|
||||||
|
@ -56,12 +56,13 @@ int tmain(T Arg) {
|
||||||
// CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
|
// CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
|
||||||
int main() {
|
int main() {
|
||||||
// CHECK: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num(
|
// CHECK: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num(
|
||||||
|
// CHECK: store i32 0, i32* [[ZERO_ADDR2:%.+]],
|
||||||
|
// CHECK: store i32 0, i32* [[ZERO_ADDR1:%.+]],
|
||||||
// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 0, void {{.+}}* [[CAP_FN4:@.+]] to void
|
// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 0, void {{.+}}* [[CAP_FN4:@.+]] to void
|
||||||
#pragma omp parallel if (true)
|
#pragma omp parallel if (true)
|
||||||
fn4();
|
fn4();
|
||||||
// CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
|
// CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
|
||||||
// CHECK: store i32 [[GTID]], i32* [[GTID_ADDR:%.+]],
|
// CHECK: call void [[CAP_FN5:@.+]](i32* [[ZERO_ADDR1]], i32* [[ZERO_ADDR1]])
|
||||||
// CHECK: call void [[CAP_FN5:@.+]](i32* [[GTID_ADDR]],
|
|
||||||
// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
|
// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
|
||||||
#pragma omp parallel if (false)
|
#pragma omp parallel if (false)
|
||||||
fn5();
|
fn5();
|
||||||
|
@ -72,8 +73,7 @@ int main() {
|
||||||
// CHECK: br label %[[OMP_END:.+]]
|
// CHECK: br label %[[OMP_END:.+]]
|
||||||
// CHECK: [[OMP_ELSE]]
|
// CHECK: [[OMP_ELSE]]
|
||||||
// CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
|
// CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
|
||||||
// CHECK: store i32 [[GTID]], i32* [[GTID_ADDR:%.+]],
|
// CHECK: call void [[CAP_FN6]](i32* [[ZERO_ADDR2]], i32* [[ZERO_ADDR2]])
|
||||||
// CHECK: call void [[CAP_FN6]](i32* [[GTID_ADDR]],
|
|
||||||
// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
|
// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
|
||||||
// CHECK: br label %[[OMP_END]]
|
// CHECK: br label %[[OMP_END]]
|
||||||
// CHECK: [[OMP_END]]
|
// CHECK: [[OMP_END]]
|
||||||
|
@ -97,10 +97,11 @@ int main() {
|
||||||
|
|
||||||
// CHECK-LABEL: define {{.+}} @{{.+}}tmain
|
// CHECK-LABEL: define {{.+}} @{{.+}}tmain
|
||||||
// CHECK: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num(
|
// CHECK: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num(
|
||||||
|
// CHECK: store i32 0, i32* [[ZERO_ADDR2:%.+]],
|
||||||
|
// CHECK: store i32 0, i32* [[ZERO_ADDR1:%.+]],
|
||||||
// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 0, void {{.+}}* [[CAP_FN1:@.+]] to void
|
// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 0, void {{.+}}* [[CAP_FN1:@.+]] to void
|
||||||
// CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
|
// CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
|
||||||
// CHECK: store i32 [[GTID]], i32* [[GTID_ADDR:%.+]],
|
// CHECK: call void [[CAP_FN2:@.+]](i32* [[ZERO_ADDR1]], i32* [[ZERO_ADDR1]])
|
||||||
// CHECK: call void [[CAP_FN2:@.+]](i32* [[GTID_ADDR]],
|
|
||||||
// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
|
// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
|
||||||
// CHECK: br i1 %{{.+}}, label %[[OMP_THEN:.+]], label %[[OMP_ELSE:.+]]
|
// CHECK: br i1 %{{.+}}, label %[[OMP_THEN:.+]], label %[[OMP_ELSE:.+]]
|
||||||
// CHECK: [[OMP_THEN]]
|
// CHECK: [[OMP_THEN]]
|
||||||
|
@ -108,8 +109,7 @@ int main() {
|
||||||
// CHECK: br label %[[OMP_END:.+]]
|
// CHECK: br label %[[OMP_END:.+]]
|
||||||
// CHECK: [[OMP_ELSE]]
|
// CHECK: [[OMP_ELSE]]
|
||||||
// CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
|
// CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
|
||||||
// CHECK: store i32 [[GTID]], i32* [[GTID_ADDR:%.+]],
|
// CHECK: call void [[CAP_FN3]](i32* [[ZERO_ADDR2]], i32* [[ZERO_ADDR2]])
|
||||||
// CHECK: call void [[CAP_FN3]](i32* [[GTID_ADDR]],
|
|
||||||
// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
|
// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
|
||||||
// CHECK: br label %[[OMP_END]]
|
// CHECK: br label %[[OMP_END]]
|
||||||
// CHECK: [[OMP_END]]
|
// CHECK: [[OMP_END]]
|
||||||
|
|
Loading…
Reference in New Issue