[AMDGPU] replace hostcall module flag with function attribute

The module flag to indicate use of hostcall is insufficient to catch
all cases where hostcall might be in use by a kernel. This is now
replaced by a function attribute that gets propagated to top-level
kernel functions via their respective call-graph.

If the attribute "amdgpu-no-hostcall-ptr" is absent on a kernel, the
default behaviour is to emit kernel metadata indicating that the
kernel uses the hostcall buffer pointer passed as an implicit
argument.

The attribute may be placed explicitly by the user, or inferred by the
AMDGPU attributor by examining the call-graph. The attribute is
inferred only if the function is not being sanitized, and the
implictarg_ptr does not result in a load of any byte in the hostcall
pointer argument.

Reviewed By: jdoerfert, arsenm, kpyzhov

Differential Revision: https://reviews.llvm.org/D119216
This commit is contained in:
Sameer Sahasrabuddhe 2022-02-11 10:43:41 +05:30
parent dcb2da13f1
commit d8f99bb6e0
38 changed files with 758 additions and 325 deletions

View File

@ -566,9 +566,6 @@ void CodeGenModule::Release() {
"__amdgpu_device_library_preserve_asan_functions_ptr", nullptr,
llvm::GlobalVariable::NotThreadLocal);
addCompilerUsedGlobal(Var);
if (!getModule().getModuleFlag("amdgpu_hostcall")) {
getModule().addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1);
}
}
// Emit amdgpu_code_object_version module flag, which is code object version
// times 100.

View File

@ -1,17 +0,0 @@
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -target-cpu gfx906 -fsanitize=address \
// RUN: -O3 -x hip | FileCheck -check-prefixes=MFCHECK %s
// MFCHECK: !{{.*}} = !{i32 4, !"amdgpu_hostcall", i32 1}
// Test to check hostcall module flag metadata is generated correctly
// when a program has printf call and compiled with -fsanitize=address.
#include "Inputs/cuda.h"
__device__ void non_kernel() {
printf("sanitized device function");
}
__global__ void kernel() {
non_kernel();
}

View File

@ -9,12 +9,12 @@
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -target-cpu gfx906 -fsanitize=address \
// RUN: -mlink-bitcode-file %t.asanrtl.bc -x hip \
// RUN: | FileCheck -check-prefixes=ASAN,MFCHECK %s
// RUN: | FileCheck -check-prefixes=ASAN %s
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -target-cpu gfx906 -fsanitize=address \
// RUN: -O3 -mlink-bitcode-file %t.asanrtl.bc -x hip \
// RUN: | FileCheck -check-prefixes=ASAN,MFCHECK %s
// RUN: | FileCheck -check-prefixes=ASAN %s
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -target-cpu gfx906 -x hip \
@ -27,7 +27,5 @@
// ASAN-DAG: @llvm.compiler.used = {{.*}}@__amdgpu_device_library_preserve_asan_functions_ptr
// ASAN-DAG: define weak void @__asan_report_load1(i64 %{{.*}})
// MFCHECK: !{{.*}} = !{i32 4, !"amdgpu_hostcall", i32 1}
// CHECK-NOT: @__amdgpu_device_library_preserve_asan_functions
// CHECK-NOT: @__asan_report_load1

View File

@ -18,6 +18,7 @@ AMDGPU_ATTRIBUTE(DISPATCH_PTR, "amdgpu-no-dispatch-ptr")
AMDGPU_ATTRIBUTE(QUEUE_PTR, "amdgpu-no-queue-ptr")
AMDGPU_ATTRIBUTE(DISPATCH_ID, "amdgpu-no-dispatch-id")
AMDGPU_ATTRIBUTE(IMPLICIT_ARG_PTR, "amdgpu-no-implicitarg-ptr")
AMDGPU_ATTRIBUTE(HOSTCALL_PTR, "amdgpu-no-hostcall-ptr")
AMDGPU_ATTRIBUTE(WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x")
AMDGPU_ATTRIBUTE(WORKGROUP_ID_Y, "amdgpu-no-workgroup-id-y")
AMDGPU_ATTRIBUTE(WORKGROUP_ID_Z, "amdgpu-no-workgroup-id-z")

View File

@ -12,6 +12,7 @@
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
@ -102,7 +103,7 @@ static bool isDSAddress(const Constant *C) {
/// Returns true if the function requires the implicit argument be passed
/// regardless of the function contents.
static bool funcRequiresImplicitArgPtr(const Function &F) {
static bool funcRequiresHostcallPtr(const Function &F) {
// Sanitizers require the hostcall buffer passed in the implicit arguments.
return F.hasFnAttribute(Attribute::SanitizeAddress) ||
F.hasFnAttribute(Attribute::SanitizeThread) ||
@ -341,12 +342,15 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
// If the function requires the implicit arg pointer due to sanitizers,
// assume it's needed even if explicitly marked as not requiring it.
const bool NeedsImplicit = funcRequiresImplicitArgPtr(*F);
if (NeedsImplicit)
const bool NeedsHostcall = funcRequiresHostcallPtr(*F);
if (NeedsHostcall) {
removeAssumedBits(IMPLICIT_ARG_PTR);
removeAssumedBits(HOSTCALL_PTR);
}
for (auto Attr : ImplicitAttrs) {
if (NeedsImplicit && Attr.first == IMPLICIT_ARG_PTR)
if (NeedsHostcall &&
(Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
continue;
if (F->hasFnAttribute(Attr.second))
@ -405,6 +409,11 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
removeAssumedBits(QUEUE_PTR);
}
if (funcRetrievesHostcallPtr(A)) {
removeAssumedBits(IMPLICIT_ARG_PTR);
removeAssumedBits(HOSTCALL_PTR);
}
return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
: ChangeStatus::UNCHANGED;
}
@ -486,6 +495,35 @@ private:
return false;
}
bool funcRetrievesHostcallPtr(Attributor &A) {
auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition();
// Check if this is a call to the implicitarg_ptr builtin and it
// is used to retrieve the hostcall pointer. The implicit arg for
// hostcall is not used only if every use of the implicitarg_ptr
// is a load that clearly does not retrieve any byte of the
// hostcall pointer. We check this by tracing all the uses of the
// initial call to the implicitarg_ptr intrinsic.
auto DoesNotLeadToHostcallPtr = [&](Instruction &I) {
auto &Call = cast<CallBase>(I);
if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
return true;
const auto &PointerInfoAA = A.getAAFor<AAPointerInfo>(
*this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED);
AAPointerInfo::OffsetAndSize OAS(Pos, 8);
return PointerInfoAA.forallInterferingAccesses(
OAS, [](const AAPointerInfo::Access &Acc, bool IsExact) {
return Acc.getRemoteInst()->isDroppable();
});
};
bool UsedAssumedInformation = false;
return !A.checkForAllCallLikeInstructions(DoesNotLeadToHostcallPtr, *this,
UsedAssumedInformation);
}
};
AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
@ -638,7 +676,7 @@ public:
AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
DenseSet<const char *> Allowed(
{&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
&AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID});
&AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID, &AAPointerInfo::ID});
Attributor A(Functions, InfoCache, CGUpdater, &Allowed);

View File

@ -405,7 +405,7 @@ void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func,
if (HiddenArgNumBytes >= 32) {
if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer);
else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) {
// The printf runtime binding pass should have ensured that hostcall and
// printf are not used in the same module.
assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
@ -794,6 +794,7 @@ void MetadataStreamerV3::emitHiddenKernelArgs(const MachineFunction &MF,
msgpack::ArrayDocNode Args) {
auto &Func = MF.getFunction();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
if (!HiddenArgNumBytes)
@ -822,7 +823,7 @@ void MetadataStreamerV3::emitHiddenKernelArgs(const MachineFunction &MF,
if (M->getNamedMetadata("llvm.printf.fmts"))
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
Args);
else if (M->getModuleFlag("amdgpu_hostcall")) {
else if (MFI.hasHostcallPtr()) {
// The printf runtime binding pass should have ensured that hostcall and
// printf are not used in the same module.
assert(!M->getNamedMetadata("llvm.printf.fmts"));
@ -973,6 +974,7 @@ void MetadataStreamerV5::emitHiddenKernelArgs(const MachineFunction &MF,
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const Module *M = Func.getParent();
auto &DL = M->getDataLayout();
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
auto Int64Ty = Type::getInt64Ty(Func.getContext());
auto Int32Ty = Type::getInt32Ty(Func.getContext());
@ -1011,7 +1013,7 @@ void MetadataStreamerV5::emitHiddenKernelArgs(const MachineFunction &MF,
} else
Offset += 8; // Skipped.
if (M->getModuleFlag("amdgpu_hostcall")) {
if (MFI.hasHostcallPtr()) {
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
Args);
} else
@ -1041,7 +1043,6 @@ void MetadataStreamerV5::emitHiddenKernelArgs(const MachineFunction &MF,
} else
Offset += 8; // Skipped.
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
if (MFI.hasQueuePtr())
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args);
}

View File

@ -47,6 +47,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
WorkItemIDZ(false),
ImplicitBufferPtr(false),
ImplicitArgPtr(false),
HostcallPtr(false),
GITPtrHigh(0xffffffff),
HighBitsOf32BitAddress(0),
GDSSize(0) {
@ -141,6 +142,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (!F.hasFnAttribute("amdgpu-no-dispatch-id"))
DispatchID = true;
if (!F.hasFnAttribute("amdgpu-no-hostcall-ptr"))
HostcallPtr = true;
}
// FIXME: This attribute is a hack, we just need an analysis on the function

View File

@ -421,6 +421,7 @@ private:
// Pointer to where the ABI inserts special kernel arguments separate from the
// user arguments. This is an offset from the KernargSegmentPtr.
bool ImplicitArgPtr : 1;
bool HostcallPtr : 1;
bool MayNeedAGPRs : 1;
@ -696,6 +697,10 @@ public:
return ImplicitArgPtr;
}
bool hasHostcallPtr() const {
return HostcallPtr;
}
bool hasImplicitBufferPtr() const {
return ImplicitBufferPtr;
}

View File

@ -136,6 +136,22 @@ bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI) {
isHsaAbiVersion5(STI);
}
// FIXME: All such magic numbers about the ABI should be in a
// central TD file.
unsigned getHostcallImplicitArgPosition() {
switch (AmdhsaCodeObjectVersion) {
case 2:
case 3:
case 4:
return 24;
case 5:
return 80;
default:
llvm_unreachable("Unexpected code object version");
return 0;
}
}
#define GET_MIMGBaseOpcodesTable_IMPL
#define GET_MIMGDimInfoTable_IMPL
#define GET_MIMGInfoTable_IMPL

View File

@ -54,6 +54,9 @@ bool isHsaAbiVersion5(const MCSubtargetInfo *STI);
/// false otherwise.
bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI);
/// \returns The offset of the hostcall pointer argument from implicitarg_ptr
unsigned getHostcallImplicitArgPosition();
struct GcnBufferFormatInfo {
unsigned Format;
unsigned BitsPerComp;

View File

@ -50,9 +50,6 @@ static Value *callPrintfBegin(IRBuilder<> &Builder, Value *Version) {
auto Int64Ty = Builder.getInt64Ty();
auto M = Builder.GetInsertBlock()->getModule();
auto Fn = M->getOrInsertFunction("__ockl_printf_begin", Int64Ty, Int64Ty);
if (!M->getModuleFlag("amdgpu_hostcall")) {
M->addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1);
}
return Builder.CreateCall(Fn, Version);
}

View File

@ -230,6 +230,6 @@ attributes #1 = { nounwind }
; AKF_HSA: attributes #[[ATTR1]] = { nounwind }
;.
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { argmemonly nofree nounwind willreturn }
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.

View File

@ -837,7 +837,7 @@ define float @func_other_intrinsic_call(float %arg) #3 {
ret float %fadd
}
; Implicit arguments need to be enabled for sanitizers
; Hostcall needs to be enabled for sanitizers
define amdgpu_kernel void @kern_sanitize_address() #4 {
; AKF_HSA-LABEL: define {{[^@]+}}@kern_sanitize_address
; AKF_HSA-SAME: () #[[ATTR5:[0-9]+]] {
@ -853,7 +853,7 @@ define amdgpu_kernel void @kern_sanitize_address() #4 {
ret void
}
; Implicit arguments need to be enabled for sanitizers
; Hostcall needs to be enabled for sanitizers
define void @func_sanitize_address() #4 {
; AKF_HSA-LABEL: define {{[^@]+}}@func_sanitize_address
; AKF_HSA-SAME: () #[[ATTR5]] {
@ -869,7 +869,7 @@ define void @func_sanitize_address() #4 {
ret void
}
; Implicit arguments need to be enabled for sanitizers
; Hostcall needs to be enabled for sanitizers
define void @func_indirect_sanitize_address() #3 {
; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_sanitize_address
; AKF_HSA-SAME: () #[[ATTR3]] {
@ -885,7 +885,7 @@ define void @func_indirect_sanitize_address() #3 {
ret void
}
; Implicit arguments need to be enabled for sanitizers
; Hostcall needs to be enabled for sanitizers
define amdgpu_kernel void @kern_indirect_sanitize_address() #3 {
; AKF_HSA-LABEL: define {{[^@]+}}@kern_indirect_sanitize_address
; AKF_HSA-SAME: () #[[ATTR4]] {
@ -937,22 +937,22 @@ attributes #5 = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" }
; AKF_HSA: attributes #[[ATTR6:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" }
;.
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn }
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR15]] = { nounwind "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind sanitize_address "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR19:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" }

View File

@ -647,15 +647,15 @@ attributes #1 = { nounwind }
; AKF_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-stack-objects" }
;.
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn }
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.

View File

@ -418,13 +418,13 @@ attributes #1 = { nounwind }
; AKF_CHECK: attributes #[[ATTR1]] = { nounwind }
;.
; ATTRIBUTOR_CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn }
; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_CHECK: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_CHECK: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_CHECK: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
; ATTRIBUTOR_CHECK: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_CHECK: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
; ATTRIBUTOR_CHECK: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_CHECK: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_CHECK: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_CHECK: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
; ATTRIBUTOR_CHECK: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_CHECK: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
; ATTRIBUTOR_CHECK: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
;.

View File

@ -35,6 +35,6 @@ define amdgpu_kernel void @test_direct_indirect_call() {
ret void
}
;.
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
;.

View File

@ -42,6 +42,6 @@ attributes #0 = { "amdgpu-no-dispatch-id" }
;.
; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-no-dispatch-id" "amdgpu-stack-objects" }
;.
; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" }
;.

View File

@ -50,7 +50,7 @@ define amdgpu_kernel void @test_non_enqueue_kernel_caller(i8 %a) #0
; CHECK-NEXT: - .address_space: global
; CHECK-NEXT: .offset: 32
; CHECK-NEXT: .size: 8
; CHECK-NEXT: .value_kind: hidden_none
; CHECK-NEXT: .value_kind: hidden_hostcall_buffer
; CHECK-NEXT: - .address_space: global
; CHECK-NEXT: .offset: 40
; CHECK-NEXT: .size: 8

View File

@ -26,6 +26,9 @@
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenHostcallBuffer
; CHECK-NOT: ValueKind: HiddenDefaultQueue
; CHECK-NOT: ValueKind: HiddenCompletionAction
define amdgpu_kernel void @test_non_enqueue_kernel_caller(i8 %a) #0
@ -56,7 +59,7 @@ define amdgpu_kernel void @test_non_enqueue_kernel_caller(i8 %a) #0
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenNone
; CHECK-NEXT: ValueKind: HiddenHostcallBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8

View File

@ -171,7 +171,7 @@ entry:
; CHECK-NEXT: - .address_space: global
; CHECK-NEXT: .offset: 48
; CHECK-NEXT: .size: 8
; CHECK-NEXT: .value_kind: hidden_none
; CHECK-NEXT: .value_kind: hidden_hostcall_buffer
; CHECK: .name: test32
; CHECK: .symbol: test32.kd
define amdgpu_kernel void @test32(
@ -214,7 +214,7 @@ entry:
; CHECK-NEXT: - .address_space: global
; CHECK-NEXT: .offset: 48
; CHECK-NEXT: .size: 8
; CHECK-NEXT: .value_kind: hidden_none
; CHECK-NEXT: .value_kind: hidden_hostcall_buffer
; CHECK-NEXT: - .address_space: global
; CHECK-NEXT: .offset: 56
; CHECK-NEXT: .size: 8
@ -265,7 +265,7 @@ entry:
; CHECK-NEXT: - .address_space: global
; CHECK-NEXT: .offset: 48
; CHECK-NEXT: .size: 8
; CHECK-NEXT: .value_kind: hidden_none
; CHECK-NEXT: .value_kind: hidden_hostcall_buffer
; CHECK-NEXT: - .address_space: global
; CHECK-NEXT: .offset: 56
; CHECK-NEXT: .size: 8

View File

@ -112,10 +112,8 @@ entry:
ret void
}
!llvm.module.flags = !{!0}
!llvm.printf.fmts = !{!1, !2}
!0 = !{i32 1, !"amdgpu_hostcall", i32 1}
!1 = !{!"1:1:4:%d\5Cn"}
!2 = !{!"2:1:8:%g\5Cn"}

View File

@ -177,7 +177,7 @@ entry:
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenNone
; CHECK-NEXT: ValueKind: HiddenHostcallBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: CodeProps:
define amdgpu_kernel void @test32(
@ -221,7 +221,7 @@ entry:
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenNone
; CHECK-NEXT: ValueKind: HiddenHostcallBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
@ -273,7 +273,7 @@ entry:
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenNone
; CHECK-NEXT: ValueKind: HiddenHostcallBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8

View File

@ -1,51 +0,0 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
; CHECK: ---
; CHECK: amdhsa.kernels:
; CHECK: - .args:
; CHECK-NEXT: - .name: a
; CHECK-NEXT: .offset: 0
; CHECK-NEXT: .size: 1
; CHECK-NEXT: .type_name: char
; CHECK-NEXT: .value_kind: by_value
; CHECK-NEXT: - .offset: 8
; CHECK-NEXT: .size: 8
; CHECK-NEXT: .value_kind: hidden_global_offset_x
; CHECK-NEXT: - .offset: 16
; CHECK-NEXT: .size: 8
; CHECK-NEXT: .value_kind: hidden_global_offset_y
; CHECK-NEXT: - .offset: 24
; CHECK-NEXT: .size: 8
; CHECK-NEXT: .value_kind: hidden_global_offset_z
; CHECK-NOT: .value_kind: hidden_hostcall_buffer
; CHECK: .language: OpenCL C
; CHECK-NEXT: .language_version:
; CHECK-NEXT: - 2
; CHECK-NEXT: - 0
; CHECK: .name: test_kernel
; CHECK: .symbol: test_kernel.kd
define amdgpu_kernel void @test_kernel(i8 %a) #0
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
!kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
ret void
}
; CHECK: amdhsa.version:
; CHECK-NEXT: - 1
; CHECK-NEXT: - 0
attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" }
!1 = !{i32 0}
!2 = !{!"none"}
!3 = !{!"char"}
!4 = !{!""}
!opencl.ocl.version = !{!90}
!90 = !{i32 2, i32 0}
; PARSER: AMDGPU HSA Metadata Parser Test: PASS

View File

@ -1,48 +0,0 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
; CHECK: ---
; CHECK: Version: [ 1, 0 ]
; CHECK: Kernels:
; CHECK: - Name: test_kernel
; CHECK-NEXT: SymbolName: 'test_kernel@kd'
; CHECK-NEXT: Language: OpenCL C
; CHECK-NEXT: LanguageVersion: [ 2, 0 ]
; CHECK-NEXT: Args:
; CHECK-NEXT: - Name: a
; CHECK-NEXT: TypeName: char
; CHECK-NEXT: Size: 1
; CHECK-NEXT: Align: 1
; CHECK-NEXT: ValueKind: ByValue
; CHECK-NEXT: AccQual: Default
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
; CHECK-NOT: ValueKind: HiddenHostcallBuffer
; CHECK-NOT: ValueKind: HiddenDefaultQueue
; CHECK-NOT: ValueKind: HiddenCompletionAction
define amdgpu_kernel void @test_kernel(i8 %a) #0
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
!kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
ret void
}
attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" }
!1 = !{i32 0}
!2 = !{!"none"}
!3 = !{!"char"}
!4 = !{!""}
!opencl.ocl.version = !{!90}
!90 = !{i32 2, i32 0}
; PARSER: AMDGPU HSA Metadata Parser Test: PASS

View File

@ -48,7 +48,4 @@ attributes #0 = { sanitize_address "amdgpu-implicitarg-num-bytes"="48" }
!opencl.ocl.version = !{!90}
!90 = !{i32 2, i32 0}
!llvm.module.flags = !{!0}
!0 = !{i32 4, !"amdgpu_hostcall", i32 1}
; CHECK: AMDGPU HSA Metadata Parser Test: PASS

View File

@ -1,55 +0,0 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
; CHECK: ---
; CHECK: amdhsa.kernels:
; CHECK: - .args:
; CHECK-NEXT: - .name: a
; CHECK-NEXT: .offset: 0
; CHECK-NEXT: .size: 1
; CHECK-NEXT: .type_name: char
; CHECK-NEXT: .value_kind: by_value
; CHECK-NEXT: - .offset: 8
; CHECK-NEXT: .size: 8
; CHECK-NEXT: .value_kind: hidden_global_offset_x
; CHECK-NEXT: - .offset: 16
; CHECK-NEXT: .size: 8
; CHECK-NEXT: .value_kind: hidden_global_offset_y
; CHECK-NEXT: - .offset: 24
; CHECK-NEXT: .size: 8
; CHECK-NEXT: .value_kind: hidden_global_offset_z
; CHECK-NEXT: - .address_space: global
; CHECK-NEXT: .offset: 32
; CHECK-NEXT: .size: 8
; CHECK-NEXT: .value_kind: hidden_hostcall_buffer
; CHECK: .language: OpenCL C
; CHECK-NEXT: .language_version:
; CHECK-NEXT: - 2
; CHECK-NEXT: - 0
; CHECK: .name: test_kernel
; CHECK: .symbol: test_kernel.kd
define amdgpu_kernel void @test_kernel(i8 %a) #0
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
!kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
ret void
}
; CHECK: amdhsa.version:
; CHECK-NEXT: - 1
; CHECK-NEXT: - 0
attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" }
!1 = !{i32 0}
!2 = !{!"none"}
!3 = !{!"char"}
!4 = !{!""}
!opencl.ocl.version = !{!90}
!90 = !{i32 2, i32 0}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_hostcall", i32 1}
; PARSER: AMDGPU HSA Metadata Parser Test: PASS

View File

@ -1,53 +0,0 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
; CHECK: ---
; CHECK: Version: [ 1, 0 ]
; CHECK: Kernels:
; CHECK: - Name: test_kernel
; CHECK-NEXT: SymbolName: 'test_kernel@kd'
; CHECK-NEXT: Language: OpenCL C
; CHECK-NEXT: LanguageVersion: [ 2, 0 ]
; CHECK-NEXT: Args:
; CHECK-NEXT: - Name: a
; CHECK-NEXT: TypeName: char
; CHECK-NEXT: Size: 1
; CHECK-NEXT: Align: 1
; CHECK-NEXT: ValueKind: ByValue
; CHECK-NEXT: AccQual: Default
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenHostcallBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NOT: ValueKind: HiddenDefaultQueue
; CHECK-NOT: ValueKind: HiddenCompletionAction
declare <2 x i64> @__ockl_hostcall_internal(i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64)
define amdgpu_kernel void @test_kernel(i8 %a) #0
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
!kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
ret void
}
attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" }
!1 = !{i32 0}
!2 = !{!"none"}
!3 = !{!"char"}
!4 = !{!""}
!opencl.ocl.version = !{!90}
!90 = !{i32 2, i32 0}
; PARSER: AMDGPU HSA Metadata Parser Test: PASS

View File

@ -0,0 +1,303 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=CHECK %s
declare void @function1()
declare void @function2() #0
; Function Attrs: noinline
define void @function3(i8 addrspace(4)* %argptr, i8 addrspace(4)* addrspace(1)* %sink) #4 {
store i8 addrspace(4)* %argptr, i8 addrspace(4)* addrspace(1)* %sink, align 8
ret void
}
; Function Attrs: noinline
define void @function4(i64 %arg, i64* %a) #4 {
store i64 %arg, i64* %a
ret void
}
; Function Attrs: noinline
define void @function5(i8 addrspace(4)* %ptr, i64* %sink) #4 {
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 8
%cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
%x = load i64, i64 addrspace(4)* %cast
store i64 %x, i64* %sink
ret void
}
; Function Attrs: nounwind readnone speculatable willreturn
declare align 4 i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #1
; CHECK: amdhsa.kernels:
; CHECK: - .args:
; CHECK-NOT: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel10
define amdgpu_kernel void @test_kernel10(i8* %a) #2 {
store i8 3, i8* %a, align 1
ret void
}
; Call to an extern function
; CHECK: - .args:
; CHECK: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel20
define amdgpu_kernel void @test_kernel20(i8* %a) #2 {
call void @function1()
store i8 3, i8* %a, align 1
ret void
}
; Explicit attribute on kernel
; CHECK: - .args:
; CHECK-NOT: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel21
define amdgpu_kernel void @test_kernel21(i8* %a) #3 {
call void @function1()
store i8 3, i8* %a, align 1
ret void
}
; Explicit attribute on extern callee
; CHECK: - .args:
; CHECK-NOT: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel22
define amdgpu_kernel void @test_kernel22(i8* %a) #2 {
call void @function2()
store i8 3, i8* %a, align 1
ret void
}
; Access more bytes than the pointer size
; CHECK: - .args:
; CHECK: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel30
define amdgpu_kernel void @test_kernel30(i128* %a) #2 {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
%cast = bitcast i8 addrspace(4)* %gep to i128 addrspace(4)*
%x = load i128, i128 addrspace(4)* %cast
store i128 %x, i128* %a
ret void
}
; Typical load of hostcall buffer pointer
; CHECK: - .args:
; CHECK: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel40
define amdgpu_kernel void @test_kernel40(i64* %a) #2 {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 24
%cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
%x = load i64, i64 addrspace(4)* %cast
store i64 %x, i64* %a
ret void
}
; Typical usage, overriden by explicit attribute on kernel
; CHECK: - .args:
; CHECK-NOT: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel41
define amdgpu_kernel void @test_kernel41(i64* %a) #3 {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 24
%cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
%x = load i64, i64 addrspace(4)* %cast
store i64 %x, i64* %a
ret void
}
; Access to implicit arg before the hostcall pointer
; CHECK: - .args:
; CHECK-NOT: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel42
define amdgpu_kernel void @test_kernel42(i64* %a) #2 {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
%cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
%x = load i64, i64 addrspace(4)* %cast
store i64 %x, i64* %a
ret void
}
; Access to implicit arg after the hostcall pointer
; CHECK: - .args:
; CHECK-NOT: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel43
define amdgpu_kernel void @test_kernel43(i64* %a) #2 {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 32
%cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
%x = load i64, i64 addrspace(4)* %cast
store i64 %x, i64* %a
ret void
}
; Accessing a byte just before the hostcall pointer
; CHECK: - .args:
; CHECK-NOT: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel44
define amdgpu_kernel void @test_kernel44(i8* %a) #2 {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 23
%x = load i8, i8 addrspace(4)* %gep, align 1
store i8 %x, i8* %a, align 1
ret void
}
; Accessing a byte inside the hostcall pointer
; CHECK: - .args:
; CHECK: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel45
define amdgpu_kernel void @test_kernel45(i8* %a) #2 {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 24
%x = load i8, i8 addrspace(4)* %gep, align 1
store i8 %x, i8* %a, align 1
ret void
}
; Accessing a byte inside the hostcall pointer
; CHECK: - .args:
; CHECK: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel46
define amdgpu_kernel void @test_kernel46(i8* %a) #2 {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 31
%x = load i8, i8 addrspace(4)* %gep, align 1
store i8 %x, i8* %a, align 1
ret void
}
; Accessing a byte just after the hostcall pointer
; CHECK: - .args:
; CHECK-NOT: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel47
define amdgpu_kernel void @test_kernel47(i8* %a) #2 {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 32
%x = load i8, i8 addrspace(4)* %gep, align 1
store i8 %x, i8* %a, align 1
ret void
}
; Access with an unknown offset
; CHECK: - .args:
; CHECK: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel50
define amdgpu_kernel void @test_kernel50(i8* %a, i32 %b) #2 {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 %b
%x = load i8, i8 addrspace(4)* %gep, align 1
store i8 %x, i8* %a, align 1
ret void
}
; Multiple geps reaching the hostcall pointer argument.
; CHECK: - .args:
; CHECK: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel51
define amdgpu_kernel void @test_kernel51(i8* %a) #2 {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep1 = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
%gep2 = getelementptr inbounds i8, i8 addrspace(4)* %gep1, i64 8
%x = load i8, i8 addrspace(4)* %gep2, align 1
store i8 %x, i8* %a, align 1
ret void
}
; Multiple geps not reaching the hostcall pointer argument.
; CHECK: - .args:
; CHECK-NOT: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel52
define amdgpu_kernel void @test_kernel52(i8* %a) #2 {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep1 = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
%gep2 = getelementptr inbounds i8, i8 addrspace(4)* %gep1, i64 16
%x = load i8, i8 addrspace(4)* %gep2, align 1
store i8 %x, i8* %a, align 1
ret void
}
; Hostcall pointer used inside a function call
; CHECK: - .args:
; CHECK: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel60
define amdgpu_kernel void @test_kernel60(i64* %a) #2 {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 24
%cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
%x = load i64, i64 addrspace(4)* %cast
call void @function4(i64 %x, i64* %a)
ret void
}
; Hostcall pointer retrieved inside a function call; chain of geps
; CHECK: - .args:
; CHECK: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel61
define amdgpu_kernel void @test_kernel61(i64* %a) #2 {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
call void @function5(i8 addrspace(4)* %gep, i64* %a)
ret void
}
; Pointer captured
; CHECK: - .args:
; CHECK: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel70
define amdgpu_kernel void @test_kernel70(i8 addrspace(4)* addrspace(1)* %sink) #2 {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42
store i8 addrspace(4)* %gep, i8 addrspace(4)* addrspace(1)* %sink, align 8
ret void
}
; Pointer captured inside function call
; CHECK: - .args:
; CHECK: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel71
define amdgpu_kernel void @test_kernel71(i8 addrspace(4)* addrspace(1)* %sink) #2 {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42
call void @function3(i8 addrspace(4)* %gep, i8 addrspace(4)* addrspace(1)* %sink)
ret void
}
; Ineffective pointer capture
; CHECK: - .args:
; CHECK-NOT: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel72
define amdgpu_kernel void @test_kernel72() #2 {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42
store i8 addrspace(4)* %gep, i8 addrspace(4)* addrspace(1)* undef, align 8
ret void
}
attributes #0 = { "amdgpu-no-hostcall-ptr" }
attributes #1 = { nounwind readnone speculatable willreturn }
attributes #2 = { "amdgpu-implicitarg-num-bytes"="48" }
attributes #3 = { "amdgpu-implicitarg-num-bytes"="48" "amdgpu-no-hostcall-ptr" }
attributes #4 = { noinline }

View File

@ -0,0 +1,301 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=CHECK %s
declare void @function1()
declare void @function2() #0
; Function Attrs: noinline
define void @function3(i8 addrspace(4)* %argptr, i8 addrspace(4)* addrspace(1)* %sink) #2 {
store i8 addrspace(4)* %argptr, i8 addrspace(4)* addrspace(1)* %sink, align 8
ret void
}
; Function Attrs: noinline
define void @function4(i64 %arg, i64* %a) #2 {
store i64 %arg, i64* %a
ret void
}
; Function Attrs: noinline
define void @function5(i8 addrspace(4)* %ptr, i64* %sink) #2 {
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 64
%cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
%x = load i64, i64 addrspace(4)* %cast
store i64 %x, i64* %sink
ret void
}
; Function Attrs: nounwind readnone speculatable willreturn
declare align 4 i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #1
; CHECK: amdhsa.kernels:
; CHECK: - .args:
; CHECK-NOT: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel10
define amdgpu_kernel void @test_kernel10(i8* %a) {
store i8 3, i8* %a, align 1
ret void
}
; Call to an extern function
; CHECK: - .args:
; CHECK: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel20
define amdgpu_kernel void @test_kernel20(i8* %a) {
call void @function1()
store i8 3, i8* %a, align 1
ret void
}
; Explicit attribute on kernel
; CHECK: - .args:
; CHECK-NOT: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel21
define amdgpu_kernel void @test_kernel21(i8* %a) #0 {
call void @function1()
store i8 3, i8* %a, align 1
ret void
}
; Explicit attribute on extern callee
; CHECK: - .args:
; CHECK-NOT: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel22
define amdgpu_kernel void @test_kernel22(i8* %a) {
call void @function2()
store i8 3, i8* %a, align 1
ret void
}
; Access more bytes than the pointer size
; CHECK: - .args:
; CHECK: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel30
define amdgpu_kernel void @test_kernel30(i128* %a) {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 72
%cast = bitcast i8 addrspace(4)* %gep to i128 addrspace(4)*
%x = load i128, i128 addrspace(4)* %cast
store i128 %x, i128* %a
ret void
}
; Typical load of hostcall buffer pointer
; CHECK: - .args:
; CHECK: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel40
define amdgpu_kernel void @test_kernel40(i64* %a) {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 80
%cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
%x = load i64, i64 addrspace(4)* %cast
store i64 %x, i64* %a
ret void
}
; Typical usage, overriden by explicit attribute on kernel
; CHECK: - .args:
; CHECK-NOT: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel41
define amdgpu_kernel void @test_kernel41(i64* %a) #0 {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 80
%cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
%x = load i64, i64 addrspace(4)* %cast
store i64 %x, i64* %a
ret void
}
; Access to implicit arg before the hostcall pointer
; CHECK: - .args:
; CHECK-NOT: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel42
define amdgpu_kernel void @test_kernel42(i64* %a) {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 72
%cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
%x = load i64, i64 addrspace(4)* %cast
store i64 %x, i64* %a
ret void
}
; Access to implicit arg after the hostcall pointer
; CHECK: - .args:
; CHECK-NOT: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel43
define amdgpu_kernel void @test_kernel43(i64* %a) {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 88
%cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
%x = load i64, i64 addrspace(4)* %cast
store i64 %x, i64* %a
ret void
}
; Accessing a byte just before the hostcall pointer
; CHECK: - .args:
; CHECK-NOT: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel44
define amdgpu_kernel void @test_kernel44(i8* %a) {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 79
%x = load i8, i8 addrspace(4)* %gep, align 1
store i8 %x, i8* %a, align 1
ret void
}
; Accessing a byte inside the hostcall pointer
; CHECK: - .args:
; CHECK: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel45
define amdgpu_kernel void @test_kernel45(i8* %a) {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 80
%x = load i8, i8 addrspace(4)* %gep, align 1
store i8 %x, i8* %a, align 1
ret void
}
; Accessing a byte inside the hostcall pointer
; CHECK: - .args:
; CHECK: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel46
define amdgpu_kernel void @test_kernel46(i8* %a) {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 87
%x = load i8, i8 addrspace(4)* %gep, align 1
store i8 %x, i8* %a, align 1
ret void
}
; Accessing a byte just after the hostcall pointer
; CHECK: - .args:
; CHECK-NOT: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel47
define amdgpu_kernel void @test_kernel47(i8* %a) {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 88
%x = load i8, i8 addrspace(4)* %gep, align 1
store i8 %x, i8* %a, align 1
ret void
}
; Access with an unknown offset
; CHECK: - .args:
; CHECK: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel50
define amdgpu_kernel void @test_kernel50(i8* %a, i32 %b) {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 %b
%x = load i8, i8 addrspace(4)* %gep, align 1
store i8 %x, i8* %a, align 1
ret void
}
; Multiple geps reaching the hostcall pointer argument.
; CHECK: - .args:
; CHECK: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel51
define amdgpu_kernel void @test_kernel51(i8* %a) {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep1 = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
%gep2 = getelementptr inbounds i8, i8 addrspace(4)* %gep1, i64 64
%x = load i8, i8 addrspace(4)* %gep2, align 1
store i8 %x, i8* %a, align 1
ret void
}
; Multiple geps not reaching the hostcall pointer argument.
; CHECK: - .args:
; CHECK-NOT: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel52
define amdgpu_kernel void @test_kernel52(i8* %a) {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep1 = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
%gep2 = getelementptr inbounds i8, i8 addrspace(4)* %gep1, i64 16
%x = load i8, i8 addrspace(4)* %gep2, align 1
store i8 %x, i8* %a, align 1
ret void
}
; Hostcall pointer used inside a function call
; CHECK: - .args:
; CHECK: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel60
define amdgpu_kernel void @test_kernel60(i64* %a) #2 {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 80
%cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
%x = load i64, i64 addrspace(4)* %cast
call void @function4(i64 %x, i64* %a)
ret void
}
; Hostcall pointer retrieved inside a function call; chain of geps
; CHECK: - .args:
; CHECK: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel61
define amdgpu_kernel void @test_kernel61(i64* %a) #2 {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
call void @function5(i8 addrspace(4)* %gep, i64* %a)
ret void
}
; Pointer captured
; CHECK: - .args:
; CHECK: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel70
define amdgpu_kernel void @test_kernel70(i8 addrspace(4)* addrspace(1)* %sink) #2 {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42
store i8 addrspace(4)* %gep, i8 addrspace(4)* addrspace(1)* %sink, align 8
ret void
}
; Pointer captured inside function call
; CHECK: - .args:
; CHECK: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel71
define amdgpu_kernel void @test_kernel71(i8 addrspace(4)* addrspace(1)* %sink) #2 {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42
call void @function3(i8 addrspace(4)* %gep, i8 addrspace(4)* addrspace(1)* %sink)
ret void
}
; Ineffective pointer capture
; CHECK: - .args:
; CHECK-NOT: hidden_hostcall_buffer
; CHECK-LABEL: .name: test_kernel72
define amdgpu_kernel void @test_kernel72() #2 {
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42
store i8 addrspace(4)* %gep, i8 addrspace(4)* addrspace(1)* undef, align 8
ret void
}
attributes #0 = { "amdgpu-no-hostcall-ptr" }
attributes #1 = { nounwind readnone speculatable willreturn }
attributes #2 = { noinline }

View File

@ -202,13 +202,13 @@ attributes #5 = { "amdgpu-flat-work-group-size"="128,512" }
attributes #6 = { "amdgpu-flat-work-group-size"="512,512" }
attributes #7 = { "amdgpu-flat-work-group-size"="64,256" }
;.
; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="64,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="128,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="64,64" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="128,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="64,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="128,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="64,64" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="128,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.

View File

@ -73,6 +73,6 @@ define amdgpu_kernel void @test_simple_indirect_call() {
;.
; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-stack-objects" }
;.
; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
;.

View File

@ -31,5 +31,5 @@ define amdgpu_kernel void @kernel1() #1 {
attributes #0 = { "uniform-work-group-size"="true" }
;.
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.

View File

@ -97,6 +97,6 @@ define amdgpu_kernel void @kernel2() #0 {
attributes #0 = { "uniform-work-group-size"="true" }
;.
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
;.

View File

@ -41,6 +41,6 @@ define amdgpu_kernel void @kernel3() #2 {
attributes #2 = { "uniform-work-group-size"="true" }
;.
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
;.

View File

@ -41,6 +41,6 @@ define amdgpu_kernel void @kernel2() #2 {
attributes #1 = { "uniform-work-group-size"="true" }
;.
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
;.

View File

@ -101,7 +101,7 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #1 {
attributes #0 = { nounwind readnone }
attributes #1 = { "uniform-work-group-size"="true" }
;.
; CHECK: attributes #[[ATTR0]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
; CHECK: attributes #[[ATTR0]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
;.

View File

@ -61,5 +61,5 @@ define amdgpu_kernel void @kernel3() #0 {
attributes #0 = { "uniform-work-group-size"="false" }
;.
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.

View File

@ -307,11 +307,6 @@ SerializeToHsacoPass::translateToLLVMIR(llvm::LLVMContext &llvmContext) {
}
}
// Set amdgpu_hostcall if host calls have been linked, as needed by newer LLVM
// FIXME: Is there a way to set this during printf() lowering that makes sense
if (ret->getFunction("__ockl_hostcall_internal"))
if (!ret->getModuleFlag("amdgpu_hostcall"))
ret->addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1);
return ret;
}