forked from OSchip/llvm-project
[HIP] Generate offloading entries for HIP with the new driver.
This patch adds the small change required to output offloading entried for HIP instead of CUDA. These should be placed in different sections so because they need to be distinct to the offloading toolchain, otherwise we'd have HIP trying to register CUDA kernels or vice-versa. This patch will precede support for HIP in the linker wrapper. Reviewed By: yaxunl, tra Differential Revision: https://reviews.llvm.org/D128850
This commit is contained in:
parent
ec2b040e18
commit
e88d53d25f
|
@ -1116,7 +1116,8 @@ void CGNVCUDARuntime::createOffloadingEntries() {
|
|||
llvm::OpenMPIRBuilder OMPBuilder(CGM.getModule());
|
||||
OMPBuilder.initialize();
|
||||
|
||||
StringRef Section = "cuda_offloading_entries";
|
||||
StringRef Section = CGM.getLangOpts().HIP ? "hip_offloading_entries"
|
||||
: "cuda_offloading_entries";
|
||||
for (KernelInfo &I : EmittedKernels)
|
||||
OMPBuilder.emitOffloadingEntry(KernelHandles[I.Kernel],
|
||||
getDeviceSideName(cast<NamedDecl>(I.D)), 0,
|
||||
|
|
|
@ -1,33 +1,57 @@
|
|||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --global-value-regex ".omp_offloading.entry.*"
|
||||
// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu \
|
||||
// RUN: --offload-new-driver -emit-llvm -o - -x cuda %s | FileCheck \
|
||||
// RUN: --check-prefix=HOST %s
|
||||
// RUN: --check-prefix=CUDA %s
|
||||
// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu \
|
||||
// RUN: --offload-new-driver -emit-llvm -o - -x hip %s | FileCheck \
|
||||
// RUN: --check-prefix=HIP %s
|
||||
|
||||
#include "Inputs/cuda.h"
|
||||
|
||||
//.
|
||||
// HOST: @x = internal global i32 undef, align 4
|
||||
// HOST: @.omp_offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00"
|
||||
// HOST: @.omp_offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr @.omp_offloading.entry_name, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
|
||||
// HOST: @.omp_offloading.entry_name.1 = internal unnamed_addr constant [8 x i8] c"_Z3barv\00"
|
||||
// HOST: @.omp_offloading.entry._Z3barv = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__barv, ptr @.omp_offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
|
||||
// HOST: @.omp_offloading.entry_name.2 = internal unnamed_addr constant [2 x i8] c"x\00"
|
||||
// HOST: @.omp_offloading.entry.x = weak constant %struct.__tgt_offload_entry { ptr @x, ptr @.omp_offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
|
||||
// CUDA: @.omp_offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00"
|
||||
// CUDA: @.omp_offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr @.omp_offloading.entry_name, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
|
||||
// CUDA: @.omp_offloading.entry_name.1 = internal unnamed_addr constant [8 x i8] c"_Z3barv\00"
|
||||
// CUDA: @.omp_offloading.entry._Z3barv = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__barv, ptr @.omp_offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
|
||||
// CUDA: @.omp_offloading.entry_name.2 = internal unnamed_addr constant [2 x i8] c"x\00"
|
||||
// CUDA: @.omp_offloading.entry.x = weak constant %struct.__tgt_offload_entry { ptr @x, ptr @.omp_offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
|
||||
//.
|
||||
// HOST-LABEL: @_Z18__device_stub__foov(
|
||||
// HOST-NEXT: entry:
|
||||
// HOST-NEXT: [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z18__device_stub__foov)
|
||||
// HOST-NEXT: br label [[SETUP_END:%.*]]
|
||||
// HOST: setup.end:
|
||||
// HOST-NEXT: ret void
|
||||
// HIP: @.omp_offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00"
|
||||
// HIP: @.omp_offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z3foov, ptr @.omp_offloading.entry_name, i64 0, i32 0, i32 0 }, section "hip_offloading_entries", align 1
|
||||
// HIP: @.omp_offloading.entry_name.1 = internal unnamed_addr constant [8 x i8] c"_Z3barv\00"
|
||||
// HIP: @.omp_offloading.entry._Z3barv = weak constant %struct.__tgt_offload_entry { ptr @_Z3barv, ptr @.omp_offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "hip_offloading_entries", align 1
|
||||
// HIP: @.omp_offloading.entry_name.2 = internal unnamed_addr constant [2 x i8] c"x\00"
|
||||
// HIP: @.omp_offloading.entry.x = weak constant %struct.__tgt_offload_entry { ptr @x, ptr @.omp_offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "hip_offloading_entries", align 1
|
||||
//.
|
||||
// CUDA-LABEL: @_Z18__device_stub__foov(
|
||||
// CUDA-NEXT: entry:
|
||||
// CUDA-NEXT: [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z18__device_stub__foov)
|
||||
// CUDA-NEXT: br label [[SETUP_END:%.*]]
|
||||
// CUDA: setup.end:
|
||||
// CUDA-NEXT: ret void
|
||||
//
|
||||
// HIP-LABEL: @_Z18__device_stub__foov(
|
||||
// HIP-NEXT: entry:
|
||||
// HIP-NEXT: [[TMP0:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z3foov)
|
||||
// HIP-NEXT: br label [[SETUP_END:%.*]]
|
||||
// HIP: setup.end:
|
||||
// HIP-NEXT: ret void
|
||||
//
|
||||
__global__ void foo() {}
|
||||
// HOST-LABEL: @_Z18__device_stub__barv(
|
||||
// HOST-NEXT: entry:
|
||||
// HOST-NEXT: [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z18__device_stub__barv)
|
||||
// HOST-NEXT: br label [[SETUP_END:%.*]]
|
||||
// HOST: setup.end:
|
||||
// HOST-NEXT: ret void
|
||||
|
||||
// CUDA-LABEL: @_Z18__device_stub__barv(
|
||||
// CUDA-NEXT: entry:
|
||||
// CUDA-NEXT: [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z18__device_stub__barv)
|
||||
// CUDA-NEXT: br label [[SETUP_END:%.*]]
|
||||
// CUDA: setup.end:
|
||||
// CUDA-NEXT: ret void
|
||||
//
|
||||
// HIP-LABEL: @_Z18__device_stub__barv(
|
||||
// HIP-NEXT: entry:
|
||||
// HIP-NEXT: [[TMP0:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z3barv)
|
||||
// HIP-NEXT: br label [[SETUP_END:%.*]]
|
||||
// HIP: setup.end:
|
||||
// HIP-NEXT: ret void
|
||||
//
|
||||
__global__ void bar() {}
|
||||
__device__ int x = 1;
|
||||
|
|
Loading…
Reference in New Issue