forked from OSchip/llvm-project
[AMDGPU] Add metadata for runtime
Added emitting metadata to elf for runtime. Runtime requires certain information (metadata) about kernels to be able to execute and query them. Such information is emitted to an elf section as a key-value pair stream. Differential Revision: https://reviews.llvm.org/D21849 llvm-svn: 275566
This commit is contained in:
parent
e4725437e8
commit
b3d17690eb
|
@ -39,7 +39,9 @@
|
|||
#include "llvm/Support/MathExtras.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
#include "llvm/Target/TargetLoweringObjectFile.h"
|
||||
#include "AMDGPURuntimeMetadata.h"
|
||||
|
||||
using namespace ::AMDGPU;
|
||||
using namespace llvm;
|
||||
|
||||
// TODO: This should get the default rounding mode from the kernel. We just set
|
||||
|
@ -111,6 +113,7 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
|
|||
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits());
|
||||
TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,
|
||||
"AMD", "AMDGPU");
|
||||
emitStartOfRuntimeMetadata(M);
|
||||
}
|
||||
|
||||
void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
|
||||
|
@ -244,6 +247,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
|||
}
|
||||
}
|
||||
|
||||
emitRuntimeMetadata(*MF.getFunction());
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -740,3 +745,227 @@ bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
|
|||
*TM.getSubtargetImpl(*MF->getFunction())->getRegisterInfo());
|
||||
return false;
|
||||
}
|
||||
|
||||
// Emit a key and an integer value for runtime metadata.
|
||||
static void emitRuntimeMDIntValue(std::unique_ptr<MCStreamer> &Streamer,
|
||||
RuntimeMD::Key K, uint64_t V,
|
||||
unsigned Size) {
|
||||
Streamer->EmitIntValue(K, 1);
|
||||
Streamer->EmitIntValue(V, Size);
|
||||
}
|
||||
|
||||
// Emit a key and a string value for runtime metadata.
|
||||
static void emitRuntimeMDStringValue(std::unique_ptr<MCStreamer> &Streamer,
|
||||
RuntimeMD::Key K, StringRef S) {
|
||||
Streamer->EmitIntValue(K, 1);
|
||||
Streamer->EmitIntValue(S.size(), 4);
|
||||
Streamer->EmitBytes(S);
|
||||
}
|
||||
|
||||
// Emit a key and three integer values for runtime metadata.
|
||||
// The three integer values are obtained from MDNode \p Node;
|
||||
static void emitRuntimeMDThreeIntValues(std::unique_ptr<MCStreamer> &Streamer,
|
||||
RuntimeMD::Key K, MDNode *Node,
|
||||
unsigned Size) {
|
||||
Streamer->EmitIntValue(K, 1);
|
||||
Streamer->EmitIntValue(mdconst::extract<ConstantInt>(
|
||||
Node->getOperand(0))->getZExtValue(), Size);
|
||||
Streamer->EmitIntValue(mdconst::extract<ConstantInt>(
|
||||
Node->getOperand(1))->getZExtValue(), Size);
|
||||
Streamer->EmitIntValue(mdconst::extract<ConstantInt>(
|
||||
Node->getOperand(2))->getZExtValue(), Size);
|
||||
}
|
||||
|
||||
void AMDGPUAsmPrinter::emitStartOfRuntimeMetadata(const Module &M) {
|
||||
OutStreamer->SwitchSection(getObjFileLowering().getContext()
|
||||
.getELFSection(RuntimeMD::SectionName, ELF::SHT_PROGBITS, 0));
|
||||
|
||||
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyMDVersion,
|
||||
RuntimeMD::MDVersion << 8 | RuntimeMD::MDRevision, 2);
|
||||
if (auto MD = M.getNamedMetadata("opencl.ocl.version")) {
|
||||
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguage,
|
||||
RuntimeMD::OpenCL_C, 1);
|
||||
auto Node = MD->getOperand(0);
|
||||
unsigned short Major = mdconst::extract<ConstantInt>(Node->getOperand(0))
|
||||
->getZExtValue();
|
||||
unsigned short Minor = mdconst::extract<ConstantInt>(Node->getOperand(1))
|
||||
->getZExtValue();
|
||||
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguageVersion,
|
||||
Major * 100 + Minor * 10, 2);
|
||||
}
|
||||
}
|
||||
|
||||
static Twine getOCLTypeName(Type *Ty, bool isSigned) {
|
||||
if (VectorType* VecTy = dyn_cast<VectorType>(Ty)) {
|
||||
Type* EleTy = VecTy->getElementType();
|
||||
unsigned Size = VecTy->getVectorNumElements();
|
||||
return getOCLTypeName(EleTy, isSigned) + Twine(Size);
|
||||
}
|
||||
switch (Ty->getTypeID()) {
|
||||
case Type::HalfTyID: return "half";
|
||||
case Type::FloatTyID: return "float";
|
||||
case Type::DoubleTyID: return "double";
|
||||
case Type::IntegerTyID: {
|
||||
if (!isSigned)
|
||||
return Twine('u') + getOCLTypeName(Ty, true);
|
||||
auto IntTy = cast<IntegerType>(Ty);
|
||||
auto BW = IntTy->getIntegerBitWidth();
|
||||
switch (BW) {
|
||||
case 8:
|
||||
return "char";
|
||||
case 16:
|
||||
return "short";
|
||||
case 32:
|
||||
return "int";
|
||||
case 64:
|
||||
return "long";
|
||||
default:
|
||||
return Twine("i") + Twine(BW);
|
||||
}
|
||||
}
|
||||
default:
|
||||
llvm_unreachable("invalid type");
|
||||
}
|
||||
}
|
||||
|
||||
static RuntimeMD::KernelArg::ValueType getRuntimeMDValueType(
|
||||
Type *Ty, StringRef TypeName) {
|
||||
if (auto VT = dyn_cast<VectorType>(Ty))
|
||||
return getRuntimeMDValueType(VT->getElementType(), TypeName);
|
||||
else if (auto PT = dyn_cast<PointerType>(Ty))
|
||||
return getRuntimeMDValueType(PT->getElementType(), TypeName);
|
||||
else if (Ty->isHalfTy())
|
||||
return RuntimeMD::KernelArg::F16;
|
||||
else if (Ty->isFloatTy())
|
||||
return RuntimeMD::KernelArg::F32;
|
||||
else if (Ty->isDoubleTy())
|
||||
return RuntimeMD::KernelArg::F64;
|
||||
else if (IntegerType* intTy = dyn_cast<IntegerType>(Ty)) {
|
||||
bool Signed = !TypeName.startswith("u");
|
||||
switch (intTy->getIntegerBitWidth()) {
|
||||
case 8:
|
||||
return Signed ? RuntimeMD::KernelArg::I8 : RuntimeMD::KernelArg::U8;
|
||||
case 16:
|
||||
return Signed ? RuntimeMD::KernelArg::I16 : RuntimeMD::KernelArg::U16;
|
||||
case 32:
|
||||
return Signed ? RuntimeMD::KernelArg::I32 : RuntimeMD::KernelArg::U32;
|
||||
case 64:
|
||||
return Signed ? RuntimeMD::KernelArg::I64 : RuntimeMD::KernelArg::U64;
|
||||
default:
|
||||
// Runtime does not recognize other integer types. Report as
|
||||
// struct type.
|
||||
return RuntimeMD::KernelArg::Struct;
|
||||
}
|
||||
} else
|
||||
return RuntimeMD::KernelArg::Struct;
|
||||
}
|
||||
|
||||
void AMDGPUAsmPrinter::emitRuntimeMetadata(const Function &F) {
|
||||
if (!F.getMetadata("kernel_arg_type"))
|
||||
return;
|
||||
|
||||
MCContext &Context = getObjFileLowering().getContext();
|
||||
OutStreamer->SwitchSection(
|
||||
Context.getELFSection(RuntimeMD::SectionName, ELF::SHT_PROGBITS, 0));
|
||||
OutStreamer->EmitIntValue(RuntimeMD::KeyKernelBegin, 1);
|
||||
emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyKernelName, F.getName());
|
||||
|
||||
for (auto &Arg:F.args()) {
|
||||
// Emit KeyArgBegin.
|
||||
unsigned I = Arg.getArgNo();
|
||||
OutStreamer->EmitIntValue(RuntimeMD::KeyArgBegin, 1);
|
||||
|
||||
// Emit KeyArgSize and KeyArgAlign.
|
||||
auto T = Arg.getType();
|
||||
auto DL = F.getParent()->getDataLayout();
|
||||
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgSize,
|
||||
DL.getTypeAllocSize(T), 4);
|
||||
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAlign,
|
||||
DL.getABITypeAlignment(T), 4);
|
||||
|
||||
// Emit KeyArgTypeName.
|
||||
auto TypeName = dyn_cast<MDString>(F.getMetadata(
|
||||
"kernel_arg_type")->getOperand(I))->getString();
|
||||
emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgTypeName, TypeName);
|
||||
|
||||
// Emit KeyArgName.
|
||||
if (auto ArgNameMD = F.getMetadata("kernel_arg_name")) {
|
||||
auto ArgName = cast<MDString>(ArgNameMD->getOperand(
|
||||
I))->getString();
|
||||
emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgName, ArgName);
|
||||
}
|
||||
|
||||
// Emit KeyArgIsVolatile, KeyArgIsRestrict, KeyArgIsConst and KeyArgIsPipe.
|
||||
auto TypeQual = cast<MDString>(F.getMetadata(
|
||||
"kernel_arg_type_qual")->getOperand(I))->getString();
|
||||
SmallVector<StringRef, 1> SplitQ;
|
||||
TypeQual.split(SplitQ, " ", -1, false/* drop empty entry*/);
|
||||
for (auto &I:SplitQ) {
|
||||
auto Key = StringSwitch<RuntimeMD::Key>(I)
|
||||
.Case("volatile", RuntimeMD::KeyArgIsVolatile)
|
||||
.Case("restrict", RuntimeMD::KeyArgIsRestrict)
|
||||
.Case("const", RuntimeMD::KeyArgIsConst)
|
||||
.Case("pipe", RuntimeMD::KeyArgIsPipe)
|
||||
.Default(RuntimeMD::KeyNull);
|
||||
OutStreamer->EmitIntValue(Key, 1);
|
||||
}
|
||||
|
||||
// Emit KeyArgTypeKind.
|
||||
auto BaseTypeName = cast<MDString>(
|
||||
F.getMetadata("kernel_arg_base_type")->getOperand(I))->getString();
|
||||
auto TypeKind = StringSwitch<RuntimeMD::KernelArg::TypeKind>(BaseTypeName)
|
||||
.Case("sampler_t", RuntimeMD::KernelArg::Sampler)
|
||||
.Case("queue_t", RuntimeMD::KernelArg::Queue)
|
||||
.Cases("image1d_t", "image1d_array_t", "image1d_buffer_t",
|
||||
"image2d_t" , "image2d_array_t", RuntimeMD::KernelArg::Image)
|
||||
.Cases("image2d_depth_t", "image2d_array_depth_t",
|
||||
"image2d_msaa_t", "image2d_array_msaa_t",
|
||||
"image2d_msaa_depth_t", RuntimeMD::KernelArg::Image)
|
||||
.Cases("image2d_array_msaa_depth_t", "image3d_t",
|
||||
RuntimeMD::KernelArg::Image)
|
||||
.Default(isa<PointerType>(T) ? RuntimeMD::KernelArg::Pointer :
|
||||
RuntimeMD::KernelArg::Value);
|
||||
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgTypeKind, TypeKind, 1);
|
||||
|
||||
// Emit KeyArgValueType.
|
||||
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgValueType,
|
||||
getRuntimeMDValueType(T, BaseTypeName), 2);
|
||||
|
||||
// Emit KeyArgAccQual.
|
||||
auto AccQual = cast<MDString>(F.getMetadata(
|
||||
"kernel_arg_access_qual")->getOperand(I))->getString();
|
||||
auto AQ = StringSwitch<RuntimeMD::KernelArg::AccessQualifer>(AccQual)
|
||||
.Case("read_only", RuntimeMD::KernelArg::ReadOnly)
|
||||
.Case("write_only", RuntimeMD::KernelArg::WriteOnly)
|
||||
.Case("read_write", RuntimeMD::KernelArg::ReadWrite)
|
||||
.Default(RuntimeMD::KernelArg::None);
|
||||
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAccQual,
|
||||
AQ, 1);
|
||||
|
||||
// Emit KeyArgAddrQual.
|
||||
if (isa<PointerType>(T))
|
||||
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAddrQual,
|
||||
T->getPointerAddressSpace(), 1);
|
||||
|
||||
// Emit KeyArgEnd
|
||||
OutStreamer->EmitIntValue(RuntimeMD::KeyArgEnd, 1);
|
||||
}
|
||||
|
||||
// Emit KeyReqdWorkGroupSize, KeyWorkGroupSizeHint, and KeyVecTypeHint.
|
||||
if (auto RWGS = F.getMetadata("reqd_work_group_size"))
|
||||
emitRuntimeMDThreeIntValues(OutStreamer, RuntimeMD::KeyReqdWorkGroupSize,
|
||||
RWGS, 4);
|
||||
if (auto WGSH = F.getMetadata("work_group_size_hint"))
|
||||
emitRuntimeMDThreeIntValues(OutStreamer, RuntimeMD::KeyWorkGroupSizeHint,
|
||||
WGSH, 4);
|
||||
if (auto VTH = F.getMetadata("vec_type_hint")) {
|
||||
auto TypeName = getOCLTypeName(cast<ValueAsMetadata>(
|
||||
VTH->getOperand(0))->getType(), mdconst::extract<ConstantInt>(
|
||||
VTH->getOperand(1))->getZExtValue()).str();
|
||||
emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyVecTypeHint,
|
||||
TypeName);
|
||||
}
|
||||
|
||||
// Emit KeyKernelEnd
|
||||
OutStreamer->EmitIntValue(RuntimeMD::KeyKernelEnd, 1);
|
||||
}
|
||||
|
|
|
@ -127,6 +127,10 @@ public:
|
|||
unsigned AsmVariant, const char *ExtraCode,
|
||||
raw_ostream &O) override;
|
||||
|
||||
void emitStartOfRuntimeMetadata(const Module &M);
|
||||
|
||||
void emitRuntimeMetadata(const Function &F);
|
||||
|
||||
protected:
|
||||
std::vector<std::string> DisasmLines, HexLines;
|
||||
size_t DisasmLineMaxLen;
|
||||
|
|
|
@ -0,0 +1,138 @@
|
|||
//===-- AMDGPURuntimeMetadata.h - AMDGPU Runtime Metadata -------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
///
|
||||
/// Enums and structure types used by runtime metadata.
|
||||
///
|
||||
/// Runtime requests certain information (metadata) about kernels to be able
|
||||
/// to execute the kernels and answer the queries about the kernels.
|
||||
/// The metadata is represented as a byte stream in an ELF section of a
|
||||
/// binary (code object). The byte stream consists of key-value pairs.
|
||||
/// Each key is an 8 bit unsigned integer. Each value can be an integer,
|
||||
/// a string, or a stream of key-value pairs. There are 3 levels of key-value
|
||||
/// pair streams. At the beginning of the ELF section is the top level
|
||||
/// key-value pair stream. A kernel-level key-value pair stream starts after
|
||||
/// encountering KeyKernelBegin and ends immediately before encountering
|
||||
/// KeyKernelEnd. A kernel-argument-level key-value pair stream starts
|
||||
/// after encountering KeyArgBegin and ends immediately before encountering
|
||||
/// KeyArgEnd. A kernel-level key-value pair stream can only appear in a top
|
||||
/// level key-value pair stream. A kernel-argument-level key-value pair stream
|
||||
/// can only appear in a kernel-level key-value pair stream.
|
||||
///
|
||||
/// The format should be kept backward compatible. New enum values and bit
|
||||
/// fields should be appended at the end. It is suggested to bump up the
|
||||
/// revision number whenever the format changes and document the change
|
||||
/// in the revision in this header.
|
||||
///
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
|
||||
#define LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace AMDGPU {
|
||||
|
||||
namespace RuntimeMD {
|
||||
|
||||
// Version and revision of runtime metadata
|
||||
const unsigned char MDVersion = 1;
|
||||
const unsigned char MDRevision = 0;
|
||||
|
||||
// ELF section name containing runtime metadata
|
||||
const char SectionName[] = ".AMDGPU.runtime_metadata";
|
||||
|
||||
// Enumeration values of keys in runtime metadata.
|
||||
enum Key {
|
||||
KeyNull = 0, // Place holder. Ignored when encountered
|
||||
KeyMDVersion = 1, // Runtime metadata version
|
||||
KeyLanguage = 2, // Language
|
||||
KeyLanguageVersion = 3, // Language version
|
||||
KeyKernelBegin = 4, // Beginning of kernel-level stream
|
||||
KeyKernelEnd = 5, // End of kernel-level stream
|
||||
KeyKernelName = 6, // Kernel name
|
||||
KeyArgBegin = 7, // Beginning of kernel-arg-level stream
|
||||
KeyArgEnd = 8, // End of kernel-arg-level stream
|
||||
KeyArgSize = 9, // Kernel arg size
|
||||
KeyArgAlign = 10, // Kernel arg alignment
|
||||
KeyArgTypeName = 11, // Kernel type name
|
||||
KeyArgName = 12, // Kernel name
|
||||
KeyArgTypeKind = 13, // Kernel argument type kind
|
||||
KeyArgValueType = 14, // Kernel argument value type
|
||||
KeyArgAddrQual = 15, // Kernel argument address qualifier
|
||||
KeyArgAccQual = 16, // Kernel argument access qualifier
|
||||
KeyArgIsConst = 17, // Kernel argument is const qualified
|
||||
KeyArgIsRestrict = 18, // Kernel argument is restrict qualified
|
||||
KeyArgIsVolatile = 19, // Kernel argument is volatile qualified
|
||||
KeyArgIsPipe = 20, // Kernel argument is pipe qualified
|
||||
KeyReqdWorkGroupSize = 21, // Required work group size
|
||||
KeyWorkGroupSizeHint = 22, // Work group size hint
|
||||
KeyVecTypeHint = 23, // Vector type hint
|
||||
KeyKernelIndex = 24, // Kernel index for device enqueue
|
||||
KeySGPRs = 25, // Number of SGPRs
|
||||
KeyVGPRs = 26, // Number of VGPRs
|
||||
KeyMinWavesPerSIMD = 27, // Minimum number of waves per SIMD
|
||||
KeyMaxWavesPerSIMD = 28, // Maximum number of waves per SIMD
|
||||
KeyFlatWorkGroupSizeLimits = 29, // Flat work group size limits
|
||||
KeyMaxWorkGroupSize = 30, // Maximum work group size
|
||||
KeyNoPartialWorkGroups = 31, // No partial work groups
|
||||
};
|
||||
|
||||
enum Language : uint8_t {
|
||||
OpenCL_C = 0,
|
||||
HCC = 1,
|
||||
OpenMP = 2,
|
||||
OpenCL_CPP = 3,
|
||||
};
|
||||
|
||||
enum LanguageVersion : uint16_t {
|
||||
V100 = 100,
|
||||
V110 = 110,
|
||||
V120 = 120,
|
||||
V200 = 200,
|
||||
V210 = 210,
|
||||
};
|
||||
|
||||
namespace KernelArg {
|
||||
enum TypeKind : uint8_t {
|
||||
Value = 0,
|
||||
Pointer = 1,
|
||||
Image = 2,
|
||||
Sampler = 3,
|
||||
Queue = 4,
|
||||
};
|
||||
|
||||
enum ValueType : uint16_t {
|
||||
Struct = 0,
|
||||
I8 = 1,
|
||||
U8 = 2,
|
||||
I16 = 3,
|
||||
U16 = 4,
|
||||
F16 = 5,
|
||||
I32 = 6,
|
||||
U32 = 7,
|
||||
F32 = 8,
|
||||
I64 = 9,
|
||||
U64 = 10,
|
||||
F64 = 11,
|
||||
};
|
||||
|
||||
enum AccessQualifer : uint8_t {
|
||||
None = 0,
|
||||
ReadOnly = 1,
|
||||
WriteOnly = 2,
|
||||
ReadWrite = 3,
|
||||
};
|
||||
} // namespace KernelArg
|
||||
} // namespace RuntimeMD
|
||||
} // namespace AMDGPU
|
||||
|
||||
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
|
|
@ -0,0 +1,848 @@
|
|||
; RUN: llc -mtriple=amdgcn--amdhsa < %s | FileCheck %s
|
||||
|
||||
%struct.A = type { i8, float }
|
||||
%opencl.image1d_t = type opaque
|
||||
%opencl.image2d_t = type opaque
|
||||
%opencl.image3d_t = type opaque
|
||||
%opencl.queue_t = type opaque
|
||||
%opencl.pipe_t = type opaque
|
||||
%struct.B = type { i32 addrspace(1)*}
|
||||
%opencl.clk_event_t = type opaque
|
||||
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .short 256
|
||||
; CHECK-NEXT: .byte 2
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 3
|
||||
; CHECK-NEXT: .short 200
|
||||
|
||||
; CHECK-LABEL:{{^}}test_char:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 9
|
||||
; CHECK-NEXT: .ascii "test_char"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 1
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 1
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .ascii "char"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 1
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_char(i8 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_ushort2:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 12
|
||||
; CHECK-NEXT: .ascii "test_ushort2"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 7
|
||||
; CHECK-NEXT: .ascii "ushort2"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 4
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_ushort2(<2 x i16> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !10 !kernel_arg_base_type !10 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_int3:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 9
|
||||
; CHECK-NEXT: .ascii "test_int3"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 16
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 16
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .ascii "int3"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 6
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_int3(<3 x i32> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !11 !kernel_arg_base_type !11 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_ulong4:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 11
|
||||
; CHECK-NEXT: .ascii "test_ulong4"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 32
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 32
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 6
|
||||
; CHECK-NEXT: .ascii "ulong4"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 10
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_ulong4(<4 x i64> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !12 !kernel_arg_base_type !12 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_half8:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 10
|
||||
; CHECK-NEXT: .ascii "test_half8"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 16
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 16
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 5
|
||||
; CHECK-NEXT: .ascii "half8"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 5
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_half8(<8 x half> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !13 !kernel_arg_base_type !13 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_float16:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 12
|
||||
; CHECK-NEXT: .ascii "test_float16"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 64
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 64
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 7
|
||||
; CHECK-NEXT: .ascii "float16"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 8
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_float16(<16 x float> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !14 !kernel_arg_base_type !14 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_double16:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 13
|
||||
; CHECK-NEXT: .ascii "test_double16"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 128
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 128
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .ascii "double16"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 11
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_double16(<16 x double> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !15 !kernel_arg_base_type !15 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_pointer:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 12
|
||||
; CHECK-NEXT: .ascii "test_pointer"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 5
|
||||
; CHECK-NEXT: .ascii "int *"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 6
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 15
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_pointer(i32 addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_image:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 10
|
||||
; CHECK-NEXT: .ascii "test_image"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 9
|
||||
; CHECK-NEXT: .ascii "image2d_t"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 2
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 0
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 15
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_image(%opencl.image2d_t addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !17 !kernel_arg_base_type !17 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_sampler:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 12
|
||||
; CHECK-NEXT: .ascii "test_sampler"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 9
|
||||
; CHECK-NEXT: .ascii "sampler_t"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 3
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 6
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_sampler(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !18 !kernel_arg_base_type !18 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_queue:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 10
|
||||
; CHECK-NEXT: .ascii "test_queue"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 7
|
||||
; CHECK-NEXT: .ascii "queue_t"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 0
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 15
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_queue(%opencl.queue_t addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !19 !kernel_arg_base_type !19 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_struct:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 11
|
||||
; CHECK-NEXT: .ascii "test_struct"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .ascii "struct A"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 0
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 15
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_struct(%struct.A* byval %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20 !kernel_arg_base_type !20 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_i128:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 9
|
||||
; CHECK-NEXT: .ascii "test_i128"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 16
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .ascii "i128"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 0
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_i128(i128 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !21 !kernel_arg_base_type !21 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_multi_arg:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 14
|
||||
; CHECK-NEXT: .ascii "test_multi_arg"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 3
|
||||
; CHECK-NEXT: .ascii "int"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 6
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 6
|
||||
; CHECK-NEXT: .ascii "short2"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 3
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 5
|
||||
; CHECK-NEXT: .ascii "char3"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 1
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !24 !kernel_arg_base_type !24 !kernel_arg_type_qual !25 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_addr_space:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 15
|
||||
; CHECK-NEXT: .ascii "test_addr_space"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 5
|
||||
; CHECK-NEXT: .ascii "int *"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 6
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 15
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 5
|
||||
; CHECK-NEXT: .ascii "int *"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 6
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 15
|
||||
; CHECK-NEXT: .byte 2
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 5
|
||||
; CHECK-NEXT: .ascii "int *"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 6
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 15
|
||||
; CHECK-NEXT: .byte 3
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g, i32 addrspace(2)* %c, i32 addrspace(3)* %l) !kernel_arg_addr_space !50 !kernel_arg_access_qual !23 !kernel_arg_type !51 !kernel_arg_base_type !51 !kernel_arg_type_qual !25 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_type_qual:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 14
|
||||
; CHECK-NEXT: .ascii "test_type_qual"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 5
|
||||
; CHECK-NEXT: .ascii "int *"
|
||||
; CHECK-NEXT: .byte 19
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 6
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 15
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 5
|
||||
; CHECK-NEXT: .ascii "int *"
|
||||
; CHECK-NEXT: .byte 17
|
||||
; CHECK-NEXT: .byte 18
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 6
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 15
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 5
|
||||
; CHECK-NEXT: .ascii "int *"
|
||||
; CHECK-NEXT: .byte 20
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 0
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 15
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_type_qual(i32 addrspace(1)* %a, i32 addrspace(1)* %b, %opencl.pipe_t addrspace(1)* %c) !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !51 !kernel_arg_base_type !51 !kernel_arg_type_qual !70 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_access_qual:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 16
|
||||
; CHECK-NEXT: .ascii "test_access_qual"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 9
|
||||
; CHECK-NEXT: .ascii "image1d_t"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 2
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 0
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 15
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 9
|
||||
; CHECK-NEXT: .ascii "image2d_t"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 2
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 0
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 2
|
||||
; CHECK-NEXT: .byte 15
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 9
|
||||
; CHECK-NEXT: .ascii "image3d_t"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 2
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 0
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 3
|
||||
; CHECK-NEXT: .byte 15
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_access_qual(%opencl.image1d_t addrspace(1)* %ro, %opencl.image2d_t addrspace(1)* %wo, %opencl.image3d_t addrspace(1)* %rw) !kernel_arg_addr_space !60 !kernel_arg_access_qual !61 !kernel_arg_type !62 !kernel_arg_base_type !62 !kernel_arg_type_qual !25 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_reqd_wgs_vec_type_hint:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 27
|
||||
; CHECK-NEXT: .ascii "test_reqd_wgs_vec_type_hint"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 3
|
||||
; CHECK-NEXT: .ascii "int"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 6
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 21
|
||||
; CHECK-NEXT: .long 1
|
||||
; CHECK-NEXT: .long 2
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 23
|
||||
; CHECK-NEXT: .long 3
|
||||
; CHECK-NEXT: .ascii "int"
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !5 !reqd_work_group_size !6 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_wgs_hint_vec_type_hint:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 27
|
||||
; CHECK-NEXT: .ascii "test_wgs_hint_vec_type_hint"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 3
|
||||
; CHECK-NEXT: .ascii "int"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 6
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 22
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .long 16
|
||||
; CHECK-NEXT: .long 32
|
||||
; CHECK-NEXT: .byte 23
|
||||
; CHECK-NEXT: .long 5
|
||||
; CHECK-NEXT: .ascii "uint4"
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !7 !work_group_size_hint !8 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_arg_ptr_to_ptr:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 19
|
||||
; CHECK-NEXT: .ascii "test_arg_ptr_to_ptr"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 6
|
||||
; CHECK-NEXT: .ascii "int **"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 6
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 15
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_arg_ptr_to_ptr(i32 * addrspace(1)* %a) !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !80 !kernel_arg_base_type !80 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_arg_struct_contains_ptr:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 28
|
||||
; CHECK-NEXT: .ascii "test_arg_struct_contains_ptr"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .ascii "struct B"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 0
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 15
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B * byval %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !82 !kernel_arg_base_type !82 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_arg_vector_of_ptr:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 22
|
||||
; CHECK-NEXT: .ascii "test_arg_vector_of_ptr"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 16
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 16
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 47
|
||||
; CHECK-NEXT: .ascii "global int* __attribute__((ext_vector_type(2)))"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 6
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x i32 addrspace(1)*> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !83 !kernel_arg_base_type !83 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_arg_unknown_builtin_type:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 29
|
||||
; CHECK-NEXT: .ascii "test_arg_unknown_builtin_type"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 11
|
||||
; CHECK-NEXT: .ascii "clk_event_t"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 0
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 15
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_arg_unknown_builtin_type(%opencl.clk_event_t addrspace(1)* %a) !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !84 !kernel_arg_base_type !84 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
!1 = !{i32 0}
|
||||
!2 = !{!"none"}
|
||||
!3 = !{!"int"}
|
||||
!4 = !{!""}
|
||||
!5 = !{i32 undef, i32 1}
|
||||
!6 = !{i32 1, i32 2, i32 4}
|
||||
!7 = !{<4 x i32> undef, i32 0}
|
||||
!8 = !{i32 8, i32 16, i32 32}
|
||||
!9 = !{!"char"}
|
||||
!10 = !{!"ushort2"}
|
||||
!11 = !{!"int3"}
|
||||
!12 = !{!"ulong4"}
|
||||
!13 = !{!"half8"}
|
||||
!14 = !{!"float16"}
|
||||
!15 = !{!"double16"}
|
||||
!16 = !{!"int *"}
|
||||
!17 = !{!"image2d_t"}
|
||||
!18 = !{!"sampler_t"}
|
||||
!19 = !{!"queue_t"}
|
||||
!20 = !{!"struct A"}
|
||||
!21 = !{!"i128"}
|
||||
!22 = !{i32 0, i32 0, i32 0}
|
||||
!23 = !{!"none", !"none", !"none"}
|
||||
!24 = !{!"int", !"short2", !"char3"}
|
||||
!25 = !{!"", !"", !""}
|
||||
!50 = !{i32 1, i32 2, i32 3}
|
||||
!51 = !{!"int *", !"int *", !"int *"}
|
||||
!60 = !{i32 1, i32 1, i32 1}
|
||||
!61 = !{!"read_only", !"write_only", !"read_write"}
|
||||
!62 = !{!"image1d_t", !"image2d_t", !"image3d_t"}
|
||||
!70 = !{!"volatile", !"const restrict", !"pipe"}
|
||||
!80 = !{!"int **"}
|
||||
!81 = !{i32 1}
|
||||
!82 = !{!"struct B"}
|
||||
!83 = !{!"global int* __attribute__((ext_vector_type(2)))"}
|
||||
!84 = !{!"clk_event_t"}
|
||||
!opencl.ocl.version = !{!90}
|
||||
!90 = !{i32 2, i32 0}
|
Loading…
Reference in New Issue