forked from OSchip/llvm-project
[HIP] Emit kernel symbol
Currently clang uses stub function to launch kernel. This is inconvenient to interop with C++ programs since the stub function has different name as kernel, which is required by ROCm debugger. This patch emits a variable symbol which has the same name as the kernel and uses it to register and launch the kernel. This allows C++ program to launch a kernel by using the original kernel name. Reviewed by: Artem Belevich Differential Revision: https://reviews.llvm.org/D86376
This commit is contained in:
parent
154c47dc06
commit
5cf2a37f12
|
@ -42,12 +42,18 @@ private:
|
|||
llvm::LLVMContext &Context;
|
||||
/// Convenience reference to the current module
|
||||
llvm::Module &TheModule;
|
||||
/// Keeps track of kernel launch stubs emitted in this module
|
||||
/// Keeps track of kernel launch stubs and handles emitted in this module
|
||||
struct KernelInfo {
|
||||
llvm::Function *Kernel;
|
||||
llvm::Function *Kernel; // stub function to help launch kernel
|
||||
const Decl *D;
|
||||
};
|
||||
llvm::SmallVector<KernelInfo, 16> EmittedKernels;
|
||||
// Map a device stub function to a symbol for identifying kernel in host code.
|
||||
// For CUDA, the symbol for identifying the kernel is the same as the device
|
||||
// stub function. For HIP, they are different.
|
||||
llvm::DenseMap<llvm::Function *, llvm::GlobalValue *> KernelHandles;
|
||||
// Map a kernel handle to the kernel stub.
|
||||
llvm::DenseMap<llvm::GlobalValue *, llvm::Function *> KernelStubs;
|
||||
struct VarInfo {
|
||||
llvm::GlobalVariable *Var;
|
||||
const VarDecl *D;
|
||||
|
@ -154,6 +160,12 @@ private:
|
|||
public:
|
||||
CGNVCUDARuntime(CodeGenModule &CGM);
|
||||
|
||||
llvm::GlobalValue *getKernelHandle(llvm::Function *F, GlobalDecl GD) override;
|
||||
llvm::Function *getKernelStub(llvm::GlobalValue *Handle) override {
|
||||
auto Loc = KernelStubs.find(Handle);
|
||||
assert(Loc != KernelStubs.end());
|
||||
return Loc->second;
|
||||
}
|
||||
void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override;
|
||||
void handleVarRegistration(const VarDecl *VD,
|
||||
llvm::GlobalVariable &Var) override;
|
||||
|
@ -272,6 +284,10 @@ std::string CGNVCUDARuntime::getDeviceSideName(const NamedDecl *ND) {
|
|||
void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
|
||||
FunctionArgList &Args) {
|
||||
EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl});
|
||||
if (auto *GV = dyn_cast<llvm::GlobalVariable>(KernelHandles[CGF.CurFn])) {
|
||||
GV->setLinkage(CGF.CurFn->getLinkage());
|
||||
GV->setInitializer(CGF.CurFn);
|
||||
}
|
||||
if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(),
|
||||
CudaFeature::CUDA_USES_NEW_LAUNCH) ||
|
||||
(CGF.getLangOpts().HIP && CGF.getLangOpts().HIPUseNewLaunchAPI))
|
||||
|
@ -350,7 +366,8 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
|
|||
ShmemSize.getPointer(), Stream.getPointer()});
|
||||
|
||||
// Emit the call to cudaLaunch
|
||||
llvm::Value *Kernel = CGF.Builder.CreatePointerCast(CGF.CurFn, VoidPtrTy);
|
||||
llvm::Value *Kernel =
|
||||
CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn], VoidPtrTy);
|
||||
CallArgList LaunchKernelArgs;
|
||||
LaunchKernelArgs.add(RValue::get(Kernel),
|
||||
cudaLaunchKernelFD->getParamDecl(0)->getType());
|
||||
|
@ -405,7 +422,8 @@ void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF,
|
|||
|
||||
// Emit the call to cudaLaunch
|
||||
llvm::FunctionCallee cudaLaunchFn = getLaunchFn();
|
||||
llvm::Value *Arg = CGF.Builder.CreatePointerCast(CGF.CurFn, CharPtrTy);
|
||||
llvm::Value *Arg =
|
||||
CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn], CharPtrTy);
|
||||
CGF.EmitRuntimeCallOrInvoke(cudaLaunchFn, Arg);
|
||||
CGF.EmitBranch(EndBlock);
|
||||
|
||||
|
@ -499,7 +517,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
|
|||
llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy);
|
||||
llvm::Value *Args[] = {
|
||||
&GpuBinaryHandlePtr,
|
||||
Builder.CreateBitCast(I.Kernel, VoidPtrTy),
|
||||
Builder.CreateBitCast(KernelHandles[I.Kernel], VoidPtrTy),
|
||||
KernelName,
|
||||
KernelName,
|
||||
llvm::ConstantInt::get(IntTy, -1),
|
||||
|
@ -1070,3 +1088,28 @@ llvm::Function *CGNVCUDARuntime::finalizeModule() {
|
|||
}
|
||||
return makeModuleCtorFunction();
|
||||
}
|
||||
|
||||
llvm::GlobalValue *CGNVCUDARuntime::getKernelHandle(llvm::Function *F,
|
||||
GlobalDecl GD) {
|
||||
auto Loc = KernelHandles.find(F);
|
||||
if (Loc != KernelHandles.end())
|
||||
return Loc->second;
|
||||
|
||||
if (!CGM.getLangOpts().HIP) {
|
||||
KernelHandles[F] = F;
|
||||
KernelStubs[F] = F;
|
||||
return F;
|
||||
}
|
||||
|
||||
auto *Var = new llvm::GlobalVariable(
|
||||
TheModule, F->getType(), /*isConstant=*/true, F->getLinkage(),
|
||||
/*Initializer=*/nullptr,
|
||||
CGM.getMangledName(
|
||||
GD.getWithKernelReferenceKind(KernelReferenceKind::Kernel)));
|
||||
Var->setAlignment(CGM.getPointerAlign().getAsAlign());
|
||||
Var->setDSOLocal(F->isDSOLocal());
|
||||
Var->setVisibility(F->getVisibility());
|
||||
KernelHandles[F] = Var;
|
||||
KernelStubs[Var] = F;
|
||||
return Var;
|
||||
}
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#ifndef LLVM_CLANG_LIB_CODEGEN_CGCUDARUNTIME_H
|
||||
#define LLVM_CLANG_LIB_CODEGEN_CGCUDARUNTIME_H
|
||||
|
||||
#include "clang/AST/GlobalDecl.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/IR/GlobalValue.h"
|
||||
|
||||
|
@ -94,6 +95,13 @@ public:
|
|||
/// compilation is for host.
|
||||
virtual std::string getDeviceSideName(const NamedDecl *ND) = 0;
|
||||
|
||||
/// Get kernel handle by stub function.
|
||||
virtual llvm::GlobalValue *getKernelHandle(llvm::Function *Stub,
|
||||
GlobalDecl GD) = 0;
|
||||
|
||||
/// Get kernel stub by kernel handle.
|
||||
virtual llvm::Function *getKernelStub(llvm::GlobalValue *Handle) = 0;
|
||||
|
||||
/// Adjust linkage of shadow variables in host compilation.
|
||||
virtual void
|
||||
internalizeDeviceSideVar(const VarDecl *D,
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "CGCUDARuntime.h"
|
||||
#include "CGCXXABI.h"
|
||||
#include "CGCall.h"
|
||||
#include "CGCleanup.h"
|
||||
|
@ -4871,8 +4872,12 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, GlobalDecl GD) {
|
|||
return CGCallee::forBuiltin(builtinID, FD);
|
||||
}
|
||||
|
||||
llvm::Constant *calleePtr = EmitFunctionDeclPointer(CGF.CGM, GD);
|
||||
return CGCallee::forDirect(calleePtr, GD);
|
||||
llvm::Constant *CalleePtr = EmitFunctionDeclPointer(CGF.CGM, GD);
|
||||
if (CGF.CGM.getLangOpts().CUDA && !CGF.CGM.getLangOpts().CUDAIsDevice &&
|
||||
FD->hasAttr<CUDAGlobalAttr>())
|
||||
CalleePtr = CGF.CGM.getCUDARuntime().getKernelStub(
|
||||
cast<llvm::GlobalValue>(CalleePtr->stripPointerCasts()));
|
||||
return CGCallee::forDirect(CalleePtr, GD);
|
||||
}
|
||||
|
||||
CGCallee CodeGenFunction::EmitCallee(const Expr *E) {
|
||||
|
@ -5266,6 +5271,19 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee
|
|||
Callee.setFunctionPointer(CalleePtr);
|
||||
}
|
||||
|
||||
// HIP function pointer contains kernel handle when it is used in triple
|
||||
// chevron. The kernel stub needs to be loaded from kernel handle and used
|
||||
// as callee.
|
||||
if (CGM.getLangOpts().HIP && !CGM.getLangOpts().CUDAIsDevice &&
|
||||
isa<CUDAKernelCallExpr>(E) &&
|
||||
(!TargetDecl || !isa<FunctionDecl>(TargetDecl))) {
|
||||
llvm::Value *Handle = Callee.getFunctionPointer();
|
||||
Handle->dump();
|
||||
auto *Cast =
|
||||
Builder.CreateBitCast(Handle, Handle->getType()->getPointerTo());
|
||||
auto *Stub = Builder.CreateLoad(Address(Cast, CGM.getPointerAlign()));
|
||||
Callee.setFunctionPointer(Stub);
|
||||
}
|
||||
llvm::CallBase *CallOrInvoke = nullptr;
|
||||
RValue Call = EmitCall(FnInfo, Callee, ReturnValue, Args, &CallOrInvoke,
|
||||
E->getExprLoc());
|
||||
|
|
|
@ -3571,9 +3571,19 @@ llvm::Constant *CodeGenModule::GetAddrOfFunction(GlobalDecl GD,
|
|||
}
|
||||
|
||||
StringRef MangledName = getMangledName(GD);
|
||||
return GetOrCreateLLVMFunction(MangledName, Ty, GD, ForVTable, DontDefer,
|
||||
/*IsThunk=*/false, llvm::AttributeList(),
|
||||
IsForDefinition);
|
||||
auto *F = GetOrCreateLLVMFunction(MangledName, Ty, GD, ForVTable, DontDefer,
|
||||
/*IsThunk=*/false, llvm::AttributeList(),
|
||||
IsForDefinition);
|
||||
// Returns kernel handle for HIP kernel stub function.
|
||||
if (LangOpts.CUDA && !LangOpts.CUDAIsDevice &&
|
||||
cast<FunctionDecl>(GD.getDecl())->hasAttr<CUDAGlobalAttr>()) {
|
||||
auto *Handle = getCUDARuntime().getKernelHandle(
|
||||
cast<llvm::Function>(F->stripPointerCasts()), GD);
|
||||
if (IsForDefinition)
|
||||
return F;
|
||||
return llvm::ConstantExpr::getBitCast(Handle, Ty->getPointerTo());
|
||||
}
|
||||
return F;
|
||||
}
|
||||
|
||||
static const FunctionDecl *
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
#include <stddef.h>
|
||||
|
||||
#if __HIP__ || __CUDA__
|
||||
#define __constant__ __attribute__((constant))
|
||||
#define __device__ __attribute__((device))
|
||||
#define __global__ __attribute__((global))
|
||||
|
@ -11,13 +12,22 @@
|
|||
#define __managed__ __attribute__((managed))
|
||||
#endif
|
||||
#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
|
||||
#else
|
||||
#define __constant__
|
||||
#define __device__
|
||||
#define __global__
|
||||
#define __host__
|
||||
#define __shared__
|
||||
#define __managed__
|
||||
#define __launch_bounds__(...)
|
||||
#endif
|
||||
|
||||
struct dim3 {
|
||||
unsigned x, y, z;
|
||||
__host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), y(y), z(z) {}
|
||||
};
|
||||
|
||||
#ifdef __HIP__
|
||||
#if __HIP__ || HIP_PLATFORM
|
||||
typedef struct hipStream *hipStream_t;
|
||||
typedef enum hipError {} hipError_t;
|
||||
int hipConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0,
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
// RUN: %clang_cc1 -x hip -emit-llvm-bc %s -o %t.hip.bc
|
||||
// RUN: %clang_cc1 -mlink-bitcode-file %t.hip.bc -DHIP_PLATFORM -emit-llvm \
|
||||
// RUN: %s -o - | FileCheck %s
|
||||
|
||||
#include "Inputs/cuda.h"
|
||||
|
||||
// CHECK: @_Z2g1i = constant void (i32)* @_Z17__device_stub__g1i, align 8
|
||||
#if __HIP__
|
||||
__global__ void g1(int x) {}
|
||||
#else
|
||||
extern void g1(int x);
|
||||
|
||||
// CHECK: call i32 @hipLaunchKernel{{.*}}@_Z2g1i
|
||||
void test() {
|
||||
hipLaunchKernel((void*)g1, 1, 1, nullptr, 0, 0);
|
||||
}
|
||||
|
||||
// CHECK: __hipRegisterFunction{{.*}}@_Z2g1i
|
||||
#endif
|
|
@ -30,6 +30,9 @@ extern "C" __global__ void ckernel(int *a) {
|
|||
*a = 1;
|
||||
}
|
||||
|
||||
// Kernel symbol for launching kernel.
|
||||
// CHECK: @[[SYM:ckernel]] = constant void (i32*)* @__device_stub__ckernel, align 8
|
||||
|
||||
// Device side kernel names
|
||||
// CHECK: @[[CKERN:[0-9]*]] = {{.*}} c"ckernel\00"
|
||||
|
||||
|
@ -40,7 +43,7 @@ extern "C" __global__ void ckernel(int *a) {
|
|||
// Make sure there is no !dbg between function attributes and '{'
|
||||
// CHECK: define{{.*}} void @[[CSTUB:__device_stub__ckernel]]{{.*}} #{{[0-9]+}} {
|
||||
// CHECK-NOT: call {{.*}}@hipLaunchByPtr{{.*}}!dbg
|
||||
// CHECK: call {{.*}}@hipLaunchByPtr{{.*}}@[[CSTUB]]
|
||||
// CHECK: call {{.*}}@hipLaunchByPtr{{.*}}@[[SYM]]
|
||||
// CHECK-NOT: ret {{.*}}!dbg
|
||||
|
||||
// CHECK-LABEL: define {{.*}}@_Z8hostfuncPi{{.*}}!dbg
|
||||
|
|
|
@ -2,10 +2,17 @@
|
|||
|
||||
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
|
||||
// RUN: -fcuda-include-gpubinary %t -o - -x hip\
|
||||
// RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=CHECK
|
||||
// RUN: | FileCheck %s
|
||||
|
||||
#include "Inputs/cuda.h"
|
||||
|
||||
// Kernel handles
|
||||
|
||||
// CHECK: @[[HCKERN:ckernel]] = constant void ()* @__device_stub__ckernel, align 8
|
||||
// CHECK: @[[HNSKERN:_ZN2ns8nskernelEv]] = constant void ()* @_ZN2ns23__device_stub__nskernelEv, align 8
|
||||
// CHECK: @[[HTKERN:_Z10kernelfuncIiEvv]] = linkonce_odr constant void ()* @_Z25__device_stub__kernelfuncIiEvv, align 8
|
||||
// CHECK: @[[HDKERN:_Z11kernel_declv]] = external constant void ()*, align 8
|
||||
|
||||
extern "C" __global__ void ckernel() {}
|
||||
|
||||
namespace ns {
|
||||
|
@ -17,6 +24,11 @@ __global__ void kernelfunc() {}
|
|||
|
||||
__global__ void kernel_decl();
|
||||
|
||||
void (*kernel_ptr)();
|
||||
void *void_ptr;
|
||||
|
||||
void launch(void *kern);
|
||||
|
||||
// Device side kernel names
|
||||
|
||||
// CHECK: @[[CKERN:[0-9]*]] = {{.*}} c"ckernel\00"
|
||||
|
@ -26,16 +38,20 @@ __global__ void kernel_decl();
|
|||
// Non-template kernel stub functions
|
||||
|
||||
// CHECK: define{{.*}}@[[CSTUB:__device_stub__ckernel]]
|
||||
// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[CSTUB]]
|
||||
// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HCKERN]]
|
||||
// CHECK: define{{.*}}@[[NSSTUB:_ZN2ns23__device_stub__nskernelEv]]
|
||||
// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[NSSTUB]]
|
||||
// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HNSKERN]]
|
||||
|
||||
// CHECK-LABEL: define{{.*}}@_Z8hostfuncv()
|
||||
|
||||
// Check kernel stub is used for triple chevron
|
||||
|
||||
// CHECK-LABEL: define{{.*}}@_Z4fun1v()
|
||||
// CHECK: call void @[[CSTUB]]()
|
||||
// CHECK: call void @[[NSSTUB]]()
|
||||
// CHECK: call void @[[TSTUB:_Z25__device_stub__kernelfuncIiEvv]]()
|
||||
// CHECK: call void @[[DSTUB:_Z26__device_stub__kernel_declv]]()
|
||||
void hostfunc(void) {
|
||||
|
||||
void fun1(void) {
|
||||
ckernel<<<1, 1>>>();
|
||||
ns::nskernel<<<1, 1>>>();
|
||||
kernelfunc<int><<<1, 1>>>();
|
||||
|
@ -45,11 +61,69 @@ void hostfunc(void) {
|
|||
// Template kernel stub functions
|
||||
|
||||
// CHECK: define{{.*}}@[[TSTUB]]
|
||||
// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[TSTUB]]
|
||||
// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HTKERN]]
|
||||
|
||||
// Check declaration of stub function for external kernel.
|
||||
|
||||
// CHECK: declare{{.*}}@[[DSTUB]]
|
||||
|
||||
// Check kernel handle is used for passing the kernel as a function pointer
|
||||
|
||||
// CHECK-LABEL: define{{.*}}@_Z4fun2v()
|
||||
// CHECK: call void @_Z6launchPv({{.*}}[[HCKERN]]
|
||||
// CHECK: call void @_Z6launchPv({{.*}}[[HNSKERN]]
|
||||
// CHECK: call void @_Z6launchPv({{.*}}[[HTKERN]]
|
||||
// CHECK: call void @_Z6launchPv({{.*}}[[HDKERN]]
|
||||
void fun2() {
|
||||
launch((void *)ckernel);
|
||||
launch((void *)ns::nskernel);
|
||||
launch((void *)kernelfunc<int>);
|
||||
launch((void *)kernel_decl);
|
||||
}
|
||||
|
||||
// Check kernel handle is used for assigning a kernel to a function pointer
|
||||
|
||||
// CHECK-LABEL: define{{.*}}@_Z4fun3v()
|
||||
// CHECK: store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr, align 8
|
||||
// CHECK: store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr, align 8
|
||||
// CHECK: store i8* bitcast (void ()** @[[HCKERN]] to i8*), i8** @void_ptr, align 8
|
||||
// CHECK: store i8* bitcast (void ()** @[[HCKERN]] to i8*), i8** @void_ptr, align 8
|
||||
void fun3() {
|
||||
kernel_ptr = ckernel;
|
||||
kernel_ptr = &ckernel;
|
||||
void_ptr = (void *)ckernel;
|
||||
void_ptr = (void *)&ckernel;
|
||||
}
|
||||
|
||||
// Check kernel stub is loaded from kernel handle when function pointer is
|
||||
// used with triple chevron
|
||||
|
||||
// CHECK-LABEL: define{{.*}}@_Z4fun4v()
|
||||
// CHECK: store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr
|
||||
// CHECK: call i32 @_Z16hipConfigureCall4dim3S_mP9hipStream
|
||||
// CHECK: %[[HANDLE:.*]] = load void ()*, void ()** @kernel_ptr, align 8
|
||||
// CHECK: %[[CAST:.*]] = bitcast void ()* %[[HANDLE]] to void ()**
|
||||
// CHECK: %[[STUB:.*]] = load void ()*, void ()** %[[CAST]], align 8
|
||||
// CHECK: call void %[[STUB]]()
|
||||
void fun4() {
|
||||
kernel_ptr = ckernel;
|
||||
kernel_ptr<<<1,1>>>();
|
||||
}
|
||||
|
||||
// Check kernel handle is passed to a function
|
||||
|
||||
// CHECK-LABEL: define{{.*}}@_Z4fun5v()
|
||||
// CHECK: store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr
|
||||
// CHECK: %[[HANDLE:.*]] = load void ()*, void ()** @kernel_ptr, align 8
|
||||
// CHECK: %[[CAST:.*]] = bitcast void ()* %[[HANDLE]] to i8*
|
||||
// CHECK: call void @_Z6launchPv(i8* %[[CAST]])
|
||||
void fun5() {
|
||||
kernel_ptr = ckernel;
|
||||
launch((void *)kernel_ptr);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define{{.*}}@__hip_register_globals
|
||||
// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[CSTUB]]{{.*}}@[[CKERN]]
|
||||
// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[NSSTUB]]{{.*}}@[[NSKERN]]
|
||||
// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[TSTUB]]{{.*}}@[[TKERN]]
|
||||
// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HCKERN]]{{.*}}@[[CKERN]]
|
||||
// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HNSKERN]]{{.*}}@[[NSKERN]]
|
||||
// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HTKERN]]{{.*}}@[[TKERN]]
|
||||
// CHECK-NOT: call{{.*}}@__hipRegisterFunction{{.*}}@[[HDKERN]]{{.*}}@[[DKERN]]
|
||||
|
|
|
@ -54,7 +54,7 @@ void f1(float *p) {
|
|||
[] __device__ (float x) { return x + 5.f; });
|
||||
}
|
||||
// HOST: @__hip_register_globals
|
||||
// HOST: __hipRegisterFunction{{.*}}@_Z17__device_stub__k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0
|
||||
// HOST: __hipRegisterFunction{{.*}}@_Z17__device_stub__k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_{{.*}}@1
|
||||
// HOST: __hipRegisterFunction{{.*}}@_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0
|
||||
// HOST: __hipRegisterFunction{{.*}}@_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_{{.*}}@1
|
||||
// MSVC: __hipRegisterFunction{{.*}}@"??$k0@V<lambda_1>@?0???R1?0??f1@@YAXPEAM@Z@QEBA@0@Z@@@YAXPEAMV<lambda_1>@?0???R0?0??f1@@YAX0@Z@QEBA@0@Z@@Z{{.*}}@0
|
||||
// MSVC: __hipRegisterFunction{{.*}}@"??$k1@V<lambda_2>@?0??f1@@YAXPEAM@Z@V<lambda_3>@?0??2@YAX0@Z@V<lambda_4>@?0??2@YAX0@Z@@@YAXPEAMV<lambda_2>@?0??f1@@YAX0@Z@V<lambda_3>@?0??1@YAX0@Z@V<lambda_4>@?0??1@YAX0@Z@@Z{{.*}}@1
|
||||
|
|
Loading…
Reference in New Issue