[AMDGPU] Add metadata for runtime

Added emitting metadata to elf for runtime.

Runtime requires certain information (metadata) about kernels to be able to execute and query them. Such information is emitted to an elf section as a key-value pair stream.

Differential Revision: https://reviews.llvm.org/D21849

llvm-svn: 275566
This commit is contained in:
Yaxun Liu 2016-07-15 14:58:21 +00:00
parent e4725437e8
commit b3d17690eb
4 changed files with 1219 additions and 0 deletions

View File

@ -39,7 +39,9 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "AMDGPURuntimeMetadata.h"
using namespace ::AMDGPU;
using namespace llvm;
// TODO: This should get the default rounding mode from the kernel. We just set
@ -111,6 +113,7 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits());
TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,
"AMD", "AMDGPU");
emitStartOfRuntimeMetadata(M);
}
void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
@ -244,6 +247,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
}
}
emitRuntimeMetadata(*MF.getFunction());
return false;
}
@ -740,3 +745,227 @@ bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
*TM.getSubtargetImpl(*MF->getFunction())->getRegisterInfo());
return false;
}
// Emit a key and an integer value for runtime metadata.
static void emitRuntimeMDIntValue(std::unique_ptr<MCStreamer> &Streamer,
RuntimeMD::Key K, uint64_t V,
unsigned Size) {
Streamer->EmitIntValue(K, 1);
Streamer->EmitIntValue(V, Size);
}
// Emit a key and a string value for runtime metadata.
static void emitRuntimeMDStringValue(std::unique_ptr<MCStreamer> &Streamer,
RuntimeMD::Key K, StringRef S) {
Streamer->EmitIntValue(K, 1);
Streamer->EmitIntValue(S.size(), 4);
Streamer->EmitBytes(S);
}
// Emit a key and three integer values for runtime metadata.
// The three integer values are obtained from MDNode \p Node;
static void emitRuntimeMDThreeIntValues(std::unique_ptr<MCStreamer> &Streamer,
RuntimeMD::Key K, MDNode *Node,
unsigned Size) {
Streamer->EmitIntValue(K, 1);
Streamer->EmitIntValue(mdconst::extract<ConstantInt>(
Node->getOperand(0))->getZExtValue(), Size);
Streamer->EmitIntValue(mdconst::extract<ConstantInt>(
Node->getOperand(1))->getZExtValue(), Size);
Streamer->EmitIntValue(mdconst::extract<ConstantInt>(
Node->getOperand(2))->getZExtValue(), Size);
}
void AMDGPUAsmPrinter::emitStartOfRuntimeMetadata(const Module &M) {
OutStreamer->SwitchSection(getObjFileLowering().getContext()
.getELFSection(RuntimeMD::SectionName, ELF::SHT_PROGBITS, 0));
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyMDVersion,
RuntimeMD::MDVersion << 8 | RuntimeMD::MDRevision, 2);
if (auto MD = M.getNamedMetadata("opencl.ocl.version")) {
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguage,
RuntimeMD::OpenCL_C, 1);
auto Node = MD->getOperand(0);
unsigned short Major = mdconst::extract<ConstantInt>(Node->getOperand(0))
->getZExtValue();
unsigned short Minor = mdconst::extract<ConstantInt>(Node->getOperand(1))
->getZExtValue();
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguageVersion,
Major * 100 + Minor * 10, 2);
}
}
static Twine getOCLTypeName(Type *Ty, bool isSigned) {
if (VectorType* VecTy = dyn_cast<VectorType>(Ty)) {
Type* EleTy = VecTy->getElementType();
unsigned Size = VecTy->getVectorNumElements();
return getOCLTypeName(EleTy, isSigned) + Twine(Size);
}
switch (Ty->getTypeID()) {
case Type::HalfTyID: return "half";
case Type::FloatTyID: return "float";
case Type::DoubleTyID: return "double";
case Type::IntegerTyID: {
if (!isSigned)
return Twine('u') + getOCLTypeName(Ty, true);
auto IntTy = cast<IntegerType>(Ty);
auto BW = IntTy->getIntegerBitWidth();
switch (BW) {
case 8:
return "char";
case 16:
return "short";
case 32:
return "int";
case 64:
return "long";
default:
return Twine("i") + Twine(BW);
}
}
default:
llvm_unreachable("invalid type");
}
}
static RuntimeMD::KernelArg::ValueType getRuntimeMDValueType(
Type *Ty, StringRef TypeName) {
if (auto VT = dyn_cast<VectorType>(Ty))
return getRuntimeMDValueType(VT->getElementType(), TypeName);
else if (auto PT = dyn_cast<PointerType>(Ty))
return getRuntimeMDValueType(PT->getElementType(), TypeName);
else if (Ty->isHalfTy())
return RuntimeMD::KernelArg::F16;
else if (Ty->isFloatTy())
return RuntimeMD::KernelArg::F32;
else if (Ty->isDoubleTy())
return RuntimeMD::KernelArg::F64;
else if (IntegerType* intTy = dyn_cast<IntegerType>(Ty)) {
bool Signed = !TypeName.startswith("u");
switch (intTy->getIntegerBitWidth()) {
case 8:
return Signed ? RuntimeMD::KernelArg::I8 : RuntimeMD::KernelArg::U8;
case 16:
return Signed ? RuntimeMD::KernelArg::I16 : RuntimeMD::KernelArg::U16;
case 32:
return Signed ? RuntimeMD::KernelArg::I32 : RuntimeMD::KernelArg::U32;
case 64:
return Signed ? RuntimeMD::KernelArg::I64 : RuntimeMD::KernelArg::U64;
default:
// Runtime does not recognize other integer types. Report as
// struct type.
return RuntimeMD::KernelArg::Struct;
}
} else
return RuntimeMD::KernelArg::Struct;
}
void AMDGPUAsmPrinter::emitRuntimeMetadata(const Function &F) {
if (!F.getMetadata("kernel_arg_type"))
return;
MCContext &Context = getObjFileLowering().getContext();
OutStreamer->SwitchSection(
Context.getELFSection(RuntimeMD::SectionName, ELF::SHT_PROGBITS, 0));
OutStreamer->EmitIntValue(RuntimeMD::KeyKernelBegin, 1);
emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyKernelName, F.getName());
for (auto &Arg:F.args()) {
// Emit KeyArgBegin.
unsigned I = Arg.getArgNo();
OutStreamer->EmitIntValue(RuntimeMD::KeyArgBegin, 1);
// Emit KeyArgSize and KeyArgAlign.
auto T = Arg.getType();
auto DL = F.getParent()->getDataLayout();
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgSize,
DL.getTypeAllocSize(T), 4);
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAlign,
DL.getABITypeAlignment(T), 4);
// Emit KeyArgTypeName.
auto TypeName = dyn_cast<MDString>(F.getMetadata(
"kernel_arg_type")->getOperand(I))->getString();
emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgTypeName, TypeName);
// Emit KeyArgName.
if (auto ArgNameMD = F.getMetadata("kernel_arg_name")) {
auto ArgName = cast<MDString>(ArgNameMD->getOperand(
I))->getString();
emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgName, ArgName);
}
// Emit KeyArgIsVolatile, KeyArgIsRestrict, KeyArgIsConst and KeyArgIsPipe.
auto TypeQual = cast<MDString>(F.getMetadata(
"kernel_arg_type_qual")->getOperand(I))->getString();
SmallVector<StringRef, 1> SplitQ;
TypeQual.split(SplitQ, " ", -1, false/* drop empty entry*/);
for (auto &I:SplitQ) {
auto Key = StringSwitch<RuntimeMD::Key>(I)
.Case("volatile", RuntimeMD::KeyArgIsVolatile)
.Case("restrict", RuntimeMD::KeyArgIsRestrict)
.Case("const", RuntimeMD::KeyArgIsConst)
.Case("pipe", RuntimeMD::KeyArgIsPipe)
.Default(RuntimeMD::KeyNull);
OutStreamer->EmitIntValue(Key, 1);
}
// Emit KeyArgTypeKind.
auto BaseTypeName = cast<MDString>(
F.getMetadata("kernel_arg_base_type")->getOperand(I))->getString();
auto TypeKind = StringSwitch<RuntimeMD::KernelArg::TypeKind>(BaseTypeName)
.Case("sampler_t", RuntimeMD::KernelArg::Sampler)
.Case("queue_t", RuntimeMD::KernelArg::Queue)
.Cases("image1d_t", "image1d_array_t", "image1d_buffer_t",
"image2d_t" , "image2d_array_t", RuntimeMD::KernelArg::Image)
.Cases("image2d_depth_t", "image2d_array_depth_t",
"image2d_msaa_t", "image2d_array_msaa_t",
"image2d_msaa_depth_t", RuntimeMD::KernelArg::Image)
.Cases("image2d_array_msaa_depth_t", "image3d_t",
RuntimeMD::KernelArg::Image)
.Default(isa<PointerType>(T) ? RuntimeMD::KernelArg::Pointer :
RuntimeMD::KernelArg::Value);
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgTypeKind, TypeKind, 1);
// Emit KeyArgValueType.
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgValueType,
getRuntimeMDValueType(T, BaseTypeName), 2);
// Emit KeyArgAccQual.
auto AccQual = cast<MDString>(F.getMetadata(
"kernel_arg_access_qual")->getOperand(I))->getString();
auto AQ = StringSwitch<RuntimeMD::KernelArg::AccessQualifer>(AccQual)
.Case("read_only", RuntimeMD::KernelArg::ReadOnly)
.Case("write_only", RuntimeMD::KernelArg::WriteOnly)
.Case("read_write", RuntimeMD::KernelArg::ReadWrite)
.Default(RuntimeMD::KernelArg::None);
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAccQual,
AQ, 1);
// Emit KeyArgAddrQual.
if (isa<PointerType>(T))
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAddrQual,
T->getPointerAddressSpace(), 1);
// Emit KeyArgEnd
OutStreamer->EmitIntValue(RuntimeMD::KeyArgEnd, 1);
}
// Emit KeyReqdWorkGroupSize, KeyWorkGroupSizeHint, and KeyVecTypeHint.
if (auto RWGS = F.getMetadata("reqd_work_group_size"))
emitRuntimeMDThreeIntValues(OutStreamer, RuntimeMD::KeyReqdWorkGroupSize,
RWGS, 4);
if (auto WGSH = F.getMetadata("work_group_size_hint"))
emitRuntimeMDThreeIntValues(OutStreamer, RuntimeMD::KeyWorkGroupSizeHint,
WGSH, 4);
if (auto VTH = F.getMetadata("vec_type_hint")) {
auto TypeName = getOCLTypeName(cast<ValueAsMetadata>(
VTH->getOperand(0))->getType(), mdconst::extract<ConstantInt>(
VTH->getOperand(1))->getZExtValue()).str();
emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyVecTypeHint,
TypeName);
}
// Emit KeyKernelEnd
OutStreamer->EmitIntValue(RuntimeMD::KeyKernelEnd, 1);
}

View File

@ -127,6 +127,10 @@ public:
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &O) override;
void emitStartOfRuntimeMetadata(const Module &M);
void emitRuntimeMetadata(const Function &F);
protected:
std::vector<std::string> DisasmLines, HexLines;
size_t DisasmLineMaxLen;

View File

@ -0,0 +1,138 @@
//===-- AMDGPURuntimeMetadata.h - AMDGPU Runtime Metadata -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
///
/// Enums and structure types used by runtime metadata.
///
/// Runtime requests certain information (metadata) about kernels to be able
/// to execute the kernels and answer the queries about the kernels.
/// The metadata is represented as a byte stream in an ELF section of a
/// binary (code object). The byte stream consists of key-value pairs.
/// Each key is an 8 bit unsigned integer. Each value can be an integer,
/// a string, or a stream of key-value pairs. There are 3 levels of key-value
/// pair streams. At the beginning of the ELF section is the top level
/// key-value pair stream. A kernel-level key-value pair stream starts after
/// encountering KeyKernelBegin and ends immediately before encountering
/// KeyKernelEnd. A kernel-argument-level key-value pair stream starts
/// after encountering KeyArgBegin and ends immediately before encountering
/// KeyArgEnd. A kernel-level key-value pair stream can only appear in a top
/// level key-value pair stream. A kernel-argument-level key-value pair stream
/// can only appear in a kernel-level key-value pair stream.
///
/// The format should be kept backward compatible. New enum values and bit
/// fields should be appended at the end. It is suggested to bump up the
/// revision number whenever the format changes and document the change
/// in the revision in this header.
///
//
//===----------------------------------------------------------------------===//
//
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
#include <stdint.h>
namespace AMDGPU {
namespace RuntimeMD {
// Version and revision of runtime metadata
const unsigned char MDVersion = 1;
const unsigned char MDRevision = 0;
// ELF section name containing runtime metadata
const char SectionName[] = ".AMDGPU.runtime_metadata";
// Enumeration values of keys in runtime metadata.
enum Key {
KeyNull = 0, // Place holder. Ignored when encountered
KeyMDVersion = 1, // Runtime metadata version
KeyLanguage = 2, // Language
KeyLanguageVersion = 3, // Language version
KeyKernelBegin = 4, // Beginning of kernel-level stream
KeyKernelEnd = 5, // End of kernel-level stream
KeyKernelName = 6, // Kernel name
KeyArgBegin = 7, // Beginning of kernel-arg-level stream
KeyArgEnd = 8, // End of kernel-arg-level stream
KeyArgSize = 9, // Kernel arg size
KeyArgAlign = 10, // Kernel arg alignment
KeyArgTypeName = 11, // Kernel type name
KeyArgName = 12, // Kernel name
KeyArgTypeKind = 13, // Kernel argument type kind
KeyArgValueType = 14, // Kernel argument value type
KeyArgAddrQual = 15, // Kernel argument address qualifier
KeyArgAccQual = 16, // Kernel argument access qualifier
KeyArgIsConst = 17, // Kernel argument is const qualified
KeyArgIsRestrict = 18, // Kernel argument is restrict qualified
KeyArgIsVolatile = 19, // Kernel argument is volatile qualified
KeyArgIsPipe = 20, // Kernel argument is pipe qualified
KeyReqdWorkGroupSize = 21, // Required work group size
KeyWorkGroupSizeHint = 22, // Work group size hint
KeyVecTypeHint = 23, // Vector type hint
KeyKernelIndex = 24, // Kernel index for device enqueue
KeySGPRs = 25, // Number of SGPRs
KeyVGPRs = 26, // Number of VGPRs
KeyMinWavesPerSIMD = 27, // Minimum number of waves per SIMD
KeyMaxWavesPerSIMD = 28, // Maximum number of waves per SIMD
KeyFlatWorkGroupSizeLimits = 29, // Flat work group size limits
KeyMaxWorkGroupSize = 30, // Maximum work group size
KeyNoPartialWorkGroups = 31, // No partial work groups
};
enum Language : uint8_t {
OpenCL_C = 0,
HCC = 1,
OpenMP = 2,
OpenCL_CPP = 3,
};
enum LanguageVersion : uint16_t {
V100 = 100,
V110 = 110,
V120 = 120,
V200 = 200,
V210 = 210,
};
namespace KernelArg {
enum TypeKind : uint8_t {
Value = 0,
Pointer = 1,
Image = 2,
Sampler = 3,
Queue = 4,
};
enum ValueType : uint16_t {
Struct = 0,
I8 = 1,
U8 = 2,
I16 = 3,
U16 = 4,
F16 = 5,
I32 = 6,
U32 = 7,
F32 = 8,
I64 = 9,
U64 = 10,
F64 = 11,
};
enum AccessQualifer : uint8_t {
None = 0,
ReadOnly = 1,
WriteOnly = 2,
ReadWrite = 3,
};
} // namespace KernelArg
} // namespace RuntimeMD
} // namespace AMDGPU
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H

View File

@ -0,0 +1,848 @@
; RUN: llc -mtriple=amdgcn--amdhsa < %s | FileCheck %s
%struct.A = type { i8, float }
%opencl.image1d_t = type opaque
%opencl.image2d_t = type opaque
%opencl.image3d_t = type opaque
%opencl.queue_t = type opaque
%opencl.pipe_t = type opaque
%struct.B = type { i32 addrspace(1)*}
%opencl.clk_event_t = type opaque
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .short 256
; CHECK-NEXT: .byte 2
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 3
; CHECK-NEXT: .short 200
; CHECK-LABEL:{{^}}test_char:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 9
; CHECK-NEXT: .ascii "test_char"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 1
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 1
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 4
; CHECK-NEXT: .ascii "char"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 1
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_char(i8 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !4 {
ret void
}
; CHECK-LABEL:{{^}}test_ushort2:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 12
; CHECK-NEXT: .ascii "test_ushort2"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 7
; CHECK-NEXT: .ascii "ushort2"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 4
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_ushort2(<2 x i16> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !10 !kernel_arg_base_type !10 !kernel_arg_type_qual !4 {
ret void
}
; CHECK-LABEL:{{^}}test_int3:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 9
; CHECK-NEXT: .ascii "test_int3"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 16
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 16
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 4
; CHECK-NEXT: .ascii "int3"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 6
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_int3(<3 x i32> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !11 !kernel_arg_base_type !11 !kernel_arg_type_qual !4 {
ret void
}
; CHECK-LABEL:{{^}}test_ulong4:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 11
; CHECK-NEXT: .ascii "test_ulong4"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 32
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 32
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 6
; CHECK-NEXT: .ascii "ulong4"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 10
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_ulong4(<4 x i64> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !12 !kernel_arg_base_type !12 !kernel_arg_type_qual !4 {
ret void
}
; CHECK-LABEL:{{^}}test_half8:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 10
; CHECK-NEXT: .ascii "test_half8"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 16
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 16
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 5
; CHECK-NEXT: .ascii "half8"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 5
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_half8(<8 x half> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !13 !kernel_arg_base_type !13 !kernel_arg_type_qual !4 {
ret void
}
; CHECK-LABEL:{{^}}test_float16:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 12
; CHECK-NEXT: .ascii "test_float16"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 64
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 64
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 7
; CHECK-NEXT: .ascii "float16"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 8
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_float16(<16 x float> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !14 !kernel_arg_base_type !14 !kernel_arg_type_qual !4 {
ret void
}
; CHECK-LABEL:{{^}}test_double16:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 13
; CHECK-NEXT: .ascii "test_double16"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 128
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 128
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 8
; CHECK-NEXT: .ascii "double16"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 11
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_double16(<16 x double> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !15 !kernel_arg_base_type !15 !kernel_arg_type_qual !4 {
ret void
}
; CHECK-LABEL:{{^}}test_pointer:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 12
; CHECK-NEXT: .ascii "test_pointer"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 5
; CHECK-NEXT: .ascii "int *"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 6
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_pointer(i32 addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !4 {
ret void
}
; CHECK-LABEL:{{^}}test_image:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 10
; CHECK-NEXT: .ascii "test_image"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 9
; CHECK-NEXT: .ascii "image2d_t"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 2
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 0
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_image(%opencl.image2d_t addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !17 !kernel_arg_base_type !17 !kernel_arg_type_qual !4 {
ret void
}
; CHECK-LABEL:{{^}}test_sampler:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 12
; CHECK-NEXT: .ascii "test_sampler"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 9
; CHECK-NEXT: .ascii "sampler_t"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 3
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 6
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_sampler(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !18 !kernel_arg_base_type !18 !kernel_arg_type_qual !4 {
ret void
}
; CHECK-LABEL:{{^}}test_queue:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 10
; CHECK-NEXT: .ascii "test_queue"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 7
; CHECK-NEXT: .ascii "queue_t"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 0
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_queue(%opencl.queue_t addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !19 !kernel_arg_base_type !19 !kernel_arg_type_qual !4 {
ret void
}
; CHECK-LABEL:{{^}}test_struct:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 11
; CHECK-NEXT: .ascii "test_struct"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 8
; CHECK-NEXT: .ascii "struct A"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 0
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_struct(%struct.A* byval %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20 !kernel_arg_base_type !20 !kernel_arg_type_qual !4 {
ret void
}
; CHECK-LABEL:{{^}}test_i128:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 9
; CHECK-NEXT: .ascii "test_i128"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 16
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 4
; CHECK-NEXT: .ascii "i128"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 0
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_i128(i128 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !21 !kernel_arg_base_type !21 !kernel_arg_type_qual !4 {
ret void
}
; CHECK-LABEL:{{^}}test_multi_arg:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 14
; CHECK-NEXT: .ascii "test_multi_arg"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 3
; CHECK-NEXT: .ascii "int"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 6
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 6
; CHECK-NEXT: .ascii "short2"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 3
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 5
; CHECK-NEXT: .ascii "char3"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 1
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !24 !kernel_arg_base_type !24 !kernel_arg_type_qual !25 {
ret void
}
; CHECK-LABEL:{{^}}test_addr_space:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 15
; CHECK-NEXT: .ascii "test_addr_space"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 5
; CHECK-NEXT: .ascii "int *"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 6
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 5
; CHECK-NEXT: .ascii "int *"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 6
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 2
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 5
; CHECK-NEXT: .ascii "int *"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 6
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 3
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g, i32 addrspace(2)* %c, i32 addrspace(3)* %l) !kernel_arg_addr_space !50 !kernel_arg_access_qual !23 !kernel_arg_type !51 !kernel_arg_base_type !51 !kernel_arg_type_qual !25 {
ret void
}
; CHECK-LABEL:{{^}}test_type_qual:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 14
; CHECK-NEXT: .ascii "test_type_qual"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 5
; CHECK-NEXT: .ascii "int *"
; CHECK-NEXT: .byte 19
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 6
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 5
; CHECK-NEXT: .ascii "int *"
; CHECK-NEXT: .byte 17
; CHECK-NEXT: .byte 18
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 6
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 5
; CHECK-NEXT: .ascii "int *"
; CHECK-NEXT: .byte 20
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 0
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_type_qual(i32 addrspace(1)* %a, i32 addrspace(1)* %b, %opencl.pipe_t addrspace(1)* %c) !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !51 !kernel_arg_base_type !51 !kernel_arg_type_qual !70 {
ret void
}
; CHECK-LABEL:{{^}}test_access_qual:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 16
; CHECK-NEXT: .ascii "test_access_qual"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 9
; CHECK-NEXT: .ascii "image1d_t"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 2
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 0
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 9
; CHECK-NEXT: .ascii "image2d_t"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 2
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 0
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 2
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 9
; CHECK-NEXT: .ascii "image3d_t"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 2
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 0
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 3
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_access_qual(%opencl.image1d_t addrspace(1)* %ro, %opencl.image2d_t addrspace(1)* %wo, %opencl.image3d_t addrspace(1)* %rw) !kernel_arg_addr_space !60 !kernel_arg_access_qual !61 !kernel_arg_type !62 !kernel_arg_base_type !62 !kernel_arg_type_qual !25 {
ret void
}
; CHECK-LABEL:{{^}}test_reqd_wgs_vec_type_hint:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 27
; CHECK-NEXT: .ascii "test_reqd_wgs_vec_type_hint"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 3
; CHECK-NEXT: .ascii "int"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 6
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 21
; CHECK-NEXT: .long 1
; CHECK-NEXT: .long 2
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 23
; CHECK-NEXT: .long 3
; CHECK-NEXT: .ascii "int"
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !5 !reqd_work_group_size !6 {
ret void
}
; CHECK-LABEL:{{^}}test_wgs_hint_vec_type_hint:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 27
; CHECK-NEXT: .ascii "test_wgs_hint_vec_type_hint"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 3
; CHECK-NEXT: .ascii "int"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 6
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 22
; CHECK-NEXT: .long 8
; CHECK-NEXT: .long 16
; CHECK-NEXT: .long 32
; CHECK-NEXT: .byte 23
; CHECK-NEXT: .long 5
; CHECK-NEXT: .ascii "uint4"
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !7 !work_group_size_hint !8 {
ret void
}
; CHECK-LABEL:{{^}}test_arg_ptr_to_ptr:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 19
; CHECK-NEXT: .ascii "test_arg_ptr_to_ptr"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 6
; CHECK-NEXT: .ascii "int **"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 6
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_arg_ptr_to_ptr(i32 * addrspace(1)* %a) !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !80 !kernel_arg_base_type !80 !kernel_arg_type_qual !4 {
ret void
}
; CHECK-LABEL:{{^}}test_arg_struct_contains_ptr:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 28
; CHECK-NEXT: .ascii "test_arg_struct_contains_ptr"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 8
; CHECK-NEXT: .ascii "struct B"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 0
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B * byval %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !82 !kernel_arg_base_type !82 !kernel_arg_type_qual !4 {
ret void
}
; CHECK-LABEL:{{^}}test_arg_vector_of_ptr:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 22
; CHECK-NEXT: .ascii "test_arg_vector_of_ptr"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 16
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 16
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 47
; CHECK-NEXT: .ascii "global int* __attribute__((ext_vector_type(2)))"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 6
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x i32 addrspace(1)*> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !83 !kernel_arg_base_type !83 !kernel_arg_type_qual !4 {
ret void
}
; CHECK-LABEL:{{^}}test_arg_unknown_builtin_type:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 29
; CHECK-NEXT: .ascii "test_arg_unknown_builtin_type"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 11
; CHECK-NEXT: .ascii "clk_event_t"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 0
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_arg_unknown_builtin_type(%opencl.clk_event_t addrspace(1)* %a) !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !84 !kernel_arg_base_type !84 !kernel_arg_type_qual !4 {
ret void
}
!1 = !{i32 0}
!2 = !{!"none"}
!3 = !{!"int"}
!4 = !{!""}
!5 = !{i32 undef, i32 1}
!6 = !{i32 1, i32 2, i32 4}
!7 = !{<4 x i32> undef, i32 0}
!8 = !{i32 8, i32 16, i32 32}
!9 = !{!"char"}
!10 = !{!"ushort2"}
!11 = !{!"int3"}
!12 = !{!"ulong4"}
!13 = !{!"half8"}
!14 = !{!"float16"}
!15 = !{!"double16"}
!16 = !{!"int *"}
!17 = !{!"image2d_t"}
!18 = !{!"sampler_t"}
!19 = !{!"queue_t"}
!20 = !{!"struct A"}
!21 = !{!"i128"}
!22 = !{i32 0, i32 0, i32 0}
!23 = !{!"none", !"none", !"none"}
!24 = !{!"int", !"short2", !"char3"}
!25 = !{!"", !"", !""}
!50 = !{i32 1, i32 2, i32 3}
!51 = !{!"int *", !"int *", !"int *"}
!60 = !{i32 1, i32 1, i32 1}
!61 = !{!"read_only", !"write_only", !"read_write"}
!62 = !{!"image1d_t", !"image2d_t", !"image3d_t"}
!70 = !{!"volatile", !"const restrict", !"pipe"}
!80 = !{!"int **"}
!81 = !{i32 1}
!82 = !{!"struct B"}
!83 = !{!"global int* __attribute__((ext_vector_type(2)))"}
!84 = !{!"clk_event_t"}
!opencl.ocl.version = !{!90}
!90 = !{i32 2, i32 0}