forked from OSchip/llvm-project
Re-commit [AMDGPU] Add metadata for runtime
Attempting to fix lit test failure on ppc. llvm-svn: 275676
This commit is contained in:
parent
c755599997
commit
a711cc7951
|
@ -39,7 +39,9 @@
|
|||
#include "llvm/Support/MathExtras.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
#include "llvm/Target/TargetLoweringObjectFile.h"
|
||||
#include "AMDGPURuntimeMetadata.h"
|
||||
|
||||
using namespace ::AMDGPU;
|
||||
using namespace llvm;
|
||||
|
||||
// TODO: This should get the default rounding mode from the kernel. We just set
|
||||
|
@ -111,6 +113,7 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
|
|||
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits());
|
||||
TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,
|
||||
"AMD", "AMDGPU");
|
||||
emitStartOfRuntimeMetadata(M);
|
||||
}
|
||||
|
||||
void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
|
||||
|
@ -244,6 +247,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
|||
}
|
||||
}
|
||||
|
||||
emitRuntimeMetadata(*MF.getFunction());
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -740,3 +745,227 @@ bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
|
|||
*TM.getSubtargetImpl(*MF->getFunction())->getRegisterInfo());
|
||||
return false;
|
||||
}
|
||||
|
||||
// Emit a key and an integer value for runtime metadata.
|
||||
static void emitRuntimeMDIntValue(std::unique_ptr<MCStreamer> &Streamer,
|
||||
RuntimeMD::Key K, uint64_t V,
|
||||
unsigned Size) {
|
||||
Streamer->EmitIntValue(K, 1);
|
||||
Streamer->EmitIntValue(V, Size);
|
||||
}
|
||||
|
||||
// Emit a key and a string value for runtime metadata.
|
||||
static void emitRuntimeMDStringValue(std::unique_ptr<MCStreamer> &Streamer,
|
||||
RuntimeMD::Key K, StringRef S) {
|
||||
Streamer->EmitIntValue(K, 1);
|
||||
Streamer->EmitIntValue(S.size(), 4);
|
||||
Streamer->EmitBytes(S);
|
||||
}
|
||||
|
||||
// Emit a key and three integer values for runtime metadata.
|
||||
// The three integer values are obtained from MDNode \p Node;
|
||||
static void emitRuntimeMDThreeIntValues(std::unique_ptr<MCStreamer> &Streamer,
|
||||
RuntimeMD::Key K, MDNode *Node,
|
||||
unsigned Size) {
|
||||
Streamer->EmitIntValue(K, 1);
|
||||
Streamer->EmitIntValue(mdconst::extract<ConstantInt>(
|
||||
Node->getOperand(0))->getZExtValue(), Size);
|
||||
Streamer->EmitIntValue(mdconst::extract<ConstantInt>(
|
||||
Node->getOperand(1))->getZExtValue(), Size);
|
||||
Streamer->EmitIntValue(mdconst::extract<ConstantInt>(
|
||||
Node->getOperand(2))->getZExtValue(), Size);
|
||||
}
|
||||
|
||||
void AMDGPUAsmPrinter::emitStartOfRuntimeMetadata(const Module &M) {
|
||||
OutStreamer->SwitchSection(getObjFileLowering().getContext()
|
||||
.getELFSection(RuntimeMD::SectionName, ELF::SHT_PROGBITS, 0));
|
||||
|
||||
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyMDVersion,
|
||||
RuntimeMD::MDVersion << 8 | RuntimeMD::MDRevision, 2);
|
||||
if (auto MD = M.getNamedMetadata("opencl.ocl.version")) {
|
||||
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguage,
|
||||
RuntimeMD::OpenCL_C, 1);
|
||||
auto Node = MD->getOperand(0);
|
||||
unsigned short Major = mdconst::extract<ConstantInt>(Node->getOperand(0))
|
||||
->getZExtValue();
|
||||
unsigned short Minor = mdconst::extract<ConstantInt>(Node->getOperand(1))
|
||||
->getZExtValue();
|
||||
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguageVersion,
|
||||
Major * 100 + Minor * 10, 2);
|
||||
}
|
||||
}
|
||||
|
||||
static std::string getOCLTypeName(Type *Ty, bool isSigned) {
|
||||
if (VectorType* VecTy = dyn_cast<VectorType>(Ty)) {
|
||||
Type* EleTy = VecTy->getElementType();
|
||||
unsigned Size = VecTy->getVectorNumElements();
|
||||
return (Twine(getOCLTypeName(EleTy, isSigned)) + Twine(Size)).str();
|
||||
}
|
||||
switch (Ty->getTypeID()) {
|
||||
case Type::HalfTyID: return "half";
|
||||
case Type::FloatTyID: return "float";
|
||||
case Type::DoubleTyID: return "double";
|
||||
case Type::IntegerTyID: {
|
||||
if (!isSigned)
|
||||
return (Twine('u') + Twine(getOCLTypeName(Ty, true))).str();
|
||||
auto IntTy = cast<IntegerType>(Ty);
|
||||
auto BW = IntTy->getIntegerBitWidth();
|
||||
switch (BW) {
|
||||
case 8:
|
||||
return "char";
|
||||
case 16:
|
||||
return "short";
|
||||
case 32:
|
||||
return "int";
|
||||
case 64:
|
||||
return "long";
|
||||
default:
|
||||
return (Twine('i') + Twine(BW)).str();
|
||||
}
|
||||
}
|
||||
default:
|
||||
llvm_unreachable("invalid type");
|
||||
}
|
||||
}
|
||||
|
||||
static RuntimeMD::KernelArg::ValueType getRuntimeMDValueType(
|
||||
Type *Ty, StringRef TypeName) {
|
||||
if (auto VT = dyn_cast<VectorType>(Ty))
|
||||
return getRuntimeMDValueType(VT->getElementType(), TypeName);
|
||||
else if (auto PT = dyn_cast<PointerType>(Ty))
|
||||
return getRuntimeMDValueType(PT->getElementType(), TypeName);
|
||||
else if (Ty->isHalfTy())
|
||||
return RuntimeMD::KernelArg::F16;
|
||||
else if (Ty->isFloatTy())
|
||||
return RuntimeMD::KernelArg::F32;
|
||||
else if (Ty->isDoubleTy())
|
||||
return RuntimeMD::KernelArg::F64;
|
||||
else if (IntegerType* intTy = dyn_cast<IntegerType>(Ty)) {
|
||||
bool Signed = !TypeName.startswith("u");
|
||||
switch (intTy->getIntegerBitWidth()) {
|
||||
case 8:
|
||||
return Signed ? RuntimeMD::KernelArg::I8 : RuntimeMD::KernelArg::U8;
|
||||
case 16:
|
||||
return Signed ? RuntimeMD::KernelArg::I16 : RuntimeMD::KernelArg::U16;
|
||||
case 32:
|
||||
return Signed ? RuntimeMD::KernelArg::I32 : RuntimeMD::KernelArg::U32;
|
||||
case 64:
|
||||
return Signed ? RuntimeMD::KernelArg::I64 : RuntimeMD::KernelArg::U64;
|
||||
default:
|
||||
// Runtime does not recognize other integer types. Report as
|
||||
// struct type.
|
||||
return RuntimeMD::KernelArg::Struct;
|
||||
}
|
||||
} else
|
||||
return RuntimeMD::KernelArg::Struct;
|
||||
}
|
||||
|
||||
void AMDGPUAsmPrinter::emitRuntimeMetadata(const Function &F) {
|
||||
if (!F.getMetadata("kernel_arg_type"))
|
||||
return;
|
||||
|
||||
MCContext &Context = getObjFileLowering().getContext();
|
||||
OutStreamer->SwitchSection(
|
||||
Context.getELFSection(RuntimeMD::SectionName, ELF::SHT_PROGBITS, 0));
|
||||
OutStreamer->EmitIntValue(RuntimeMD::KeyKernelBegin, 1);
|
||||
emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyKernelName, F.getName());
|
||||
|
||||
for (auto &Arg:F.args()) {
|
||||
// Emit KeyArgBegin.
|
||||
unsigned I = Arg.getArgNo();
|
||||
OutStreamer->EmitIntValue(RuntimeMD::KeyArgBegin, 1);
|
||||
|
||||
// Emit KeyArgSize and KeyArgAlign.
|
||||
auto T = Arg.getType();
|
||||
auto DL = F.getParent()->getDataLayout();
|
||||
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgSize,
|
||||
DL.getTypeAllocSize(T), 4);
|
||||
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAlign,
|
||||
DL.getABITypeAlignment(T), 4);
|
||||
|
||||
// Emit KeyArgTypeName.
|
||||
auto TypeName = dyn_cast<MDString>(F.getMetadata(
|
||||
"kernel_arg_type")->getOperand(I))->getString();
|
||||
emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgTypeName, TypeName);
|
||||
|
||||
// Emit KeyArgName.
|
||||
if (auto ArgNameMD = F.getMetadata("kernel_arg_name")) {
|
||||
auto ArgName = cast<MDString>(ArgNameMD->getOperand(
|
||||
I))->getString();
|
||||
emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgName, ArgName);
|
||||
}
|
||||
|
||||
// Emit KeyArgIsVolatile, KeyArgIsRestrict, KeyArgIsConst and KeyArgIsPipe.
|
||||
auto TypeQual = cast<MDString>(F.getMetadata(
|
||||
"kernel_arg_type_qual")->getOperand(I))->getString();
|
||||
SmallVector<StringRef, 1> SplitQ;
|
||||
TypeQual.split(SplitQ, " ", -1, false/* drop empty entry*/);
|
||||
for (auto &I:SplitQ) {
|
||||
auto Key = StringSwitch<RuntimeMD::Key>(I)
|
||||
.Case("volatile", RuntimeMD::KeyArgIsVolatile)
|
||||
.Case("restrict", RuntimeMD::KeyArgIsRestrict)
|
||||
.Case("const", RuntimeMD::KeyArgIsConst)
|
||||
.Case("pipe", RuntimeMD::KeyArgIsPipe)
|
||||
.Default(RuntimeMD::KeyNull);
|
||||
OutStreamer->EmitIntValue(Key, 1);
|
||||
}
|
||||
|
||||
// Emit KeyArgTypeKind.
|
||||
auto BaseTypeName = cast<MDString>(
|
||||
F.getMetadata("kernel_arg_base_type")->getOperand(I))->getString();
|
||||
auto TypeKind = StringSwitch<RuntimeMD::KernelArg::TypeKind>(BaseTypeName)
|
||||
.Case("sampler_t", RuntimeMD::KernelArg::Sampler)
|
||||
.Case("queue_t", RuntimeMD::KernelArg::Queue)
|
||||
.Cases("image1d_t", "image1d_array_t", "image1d_buffer_t",
|
||||
"image2d_t" , "image2d_array_t", RuntimeMD::KernelArg::Image)
|
||||
.Cases("image2d_depth_t", "image2d_array_depth_t",
|
||||
"image2d_msaa_t", "image2d_array_msaa_t",
|
||||
"image2d_msaa_depth_t", RuntimeMD::KernelArg::Image)
|
||||
.Cases("image2d_array_msaa_depth_t", "image3d_t",
|
||||
RuntimeMD::KernelArg::Image)
|
||||
.Default(isa<PointerType>(T) ? RuntimeMD::KernelArg::Pointer :
|
||||
RuntimeMD::KernelArg::Value);
|
||||
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgTypeKind, TypeKind, 1);
|
||||
|
||||
// Emit KeyArgValueType.
|
||||
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgValueType,
|
||||
getRuntimeMDValueType(T, BaseTypeName), 2);
|
||||
|
||||
// Emit KeyArgAccQual.
|
||||
auto AccQual = cast<MDString>(F.getMetadata(
|
||||
"kernel_arg_access_qual")->getOperand(I))->getString();
|
||||
auto AQ = StringSwitch<RuntimeMD::KernelArg::AccessQualifer>(AccQual)
|
||||
.Case("read_only", RuntimeMD::KernelArg::ReadOnly)
|
||||
.Case("write_only", RuntimeMD::KernelArg::WriteOnly)
|
||||
.Case("read_write", RuntimeMD::KernelArg::ReadWrite)
|
||||
.Default(RuntimeMD::KernelArg::None);
|
||||
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAccQual,
|
||||
AQ, 1);
|
||||
|
||||
// Emit KeyArgAddrQual.
|
||||
if (isa<PointerType>(T))
|
||||
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAddrQual,
|
||||
T->getPointerAddressSpace(), 1);
|
||||
|
||||
// Emit KeyArgEnd
|
||||
OutStreamer->EmitIntValue(RuntimeMD::KeyArgEnd, 1);
|
||||
}
|
||||
|
||||
// Emit KeyReqdWorkGroupSize, KeyWorkGroupSizeHint, and KeyVecTypeHint.
|
||||
if (auto RWGS = F.getMetadata("reqd_work_group_size"))
|
||||
emitRuntimeMDThreeIntValues(OutStreamer, RuntimeMD::KeyReqdWorkGroupSize,
|
||||
RWGS, 4);
|
||||
if (auto WGSH = F.getMetadata("work_group_size_hint"))
|
||||
emitRuntimeMDThreeIntValues(OutStreamer, RuntimeMD::KeyWorkGroupSizeHint,
|
||||
WGSH, 4);
|
||||
if (auto VTH = F.getMetadata("vec_type_hint")) {
|
||||
auto TypeName = getOCLTypeName(cast<ValueAsMetadata>(
|
||||
VTH->getOperand(0))->getType(), mdconst::extract<ConstantInt>(
|
||||
VTH->getOperand(1))->getZExtValue());
|
||||
emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyVecTypeHint,
|
||||
TypeName);
|
||||
}
|
||||
|
||||
// Emit KeyKernelEnd
|
||||
OutStreamer->EmitIntValue(RuntimeMD::KeyKernelEnd, 1);
|
||||
}
|
||||
|
|
|
@ -127,6 +127,10 @@ public:
|
|||
unsigned AsmVariant, const char *ExtraCode,
|
||||
raw_ostream &O) override;
|
||||
|
||||
void emitStartOfRuntimeMetadata(const Module &M);
|
||||
|
||||
void emitRuntimeMetadata(const Function &F);
|
||||
|
||||
protected:
|
||||
std::vector<std::string> DisasmLines, HexLines;
|
||||
size_t DisasmLineMaxLen;
|
||||
|
|
|
@ -0,0 +1,138 @@
|
|||
//===-- AMDGPURuntimeMetadata.h - AMDGPU Runtime Metadata -------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
///
|
||||
/// Enums and structure types used by runtime metadata.
|
||||
///
|
||||
/// Runtime requests certain information (metadata) about kernels to be able
|
||||
/// to execute the kernels and answer the queries about the kernels.
|
||||
/// The metadata is represented as a byte stream in an ELF section of a
|
||||
/// binary (code object). The byte stream consists of key-value pairs.
|
||||
/// Each key is an 8 bit unsigned integer. Each value can be an integer,
|
||||
/// a string, or a stream of key-value pairs. There are 3 levels of key-value
|
||||
/// pair streams. At the beginning of the ELF section is the top level
|
||||
/// key-value pair stream. A kernel-level key-value pair stream starts after
|
||||
/// encountering KeyKernelBegin and ends immediately before encountering
|
||||
/// KeyKernelEnd. A kernel-argument-level key-value pair stream starts
|
||||
/// after encountering KeyArgBegin and ends immediately before encountering
|
||||
/// KeyArgEnd. A kernel-level key-value pair stream can only appear in a top
|
||||
/// level key-value pair stream. A kernel-argument-level key-value pair stream
|
||||
/// can only appear in a kernel-level key-value pair stream.
|
||||
///
|
||||
/// The format should be kept backward compatible. New enum values and bit
|
||||
/// fields should be appended at the end. It is suggested to bump up the
|
||||
/// revision number whenever the format changes and document the change
|
||||
/// in the revision in this header.
|
||||
///
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
|
||||
#define LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace AMDGPU {
|
||||
|
||||
namespace RuntimeMD {
|
||||
|
||||
// Version and revision of runtime metadata
|
||||
const unsigned char MDVersion = 1;
|
||||
const unsigned char MDRevision = 0;
|
||||
|
||||
// ELF section name containing runtime metadata
|
||||
const char SectionName[] = ".AMDGPU.runtime_metadata";
|
||||
|
||||
// Enumeration values of keys in runtime metadata.
|
||||
enum Key {
|
||||
KeyNull = 0, // Place holder. Ignored when encountered
|
||||
KeyMDVersion = 1, // Runtime metadata version
|
||||
KeyLanguage = 2, // Language
|
||||
KeyLanguageVersion = 3, // Language version
|
||||
KeyKernelBegin = 4, // Beginning of kernel-level stream
|
||||
KeyKernelEnd = 5, // End of kernel-level stream
|
||||
KeyKernelName = 6, // Kernel name
|
||||
KeyArgBegin = 7, // Beginning of kernel-arg-level stream
|
||||
KeyArgEnd = 8, // End of kernel-arg-level stream
|
||||
KeyArgSize = 9, // Kernel arg size
|
||||
KeyArgAlign = 10, // Kernel arg alignment
|
||||
KeyArgTypeName = 11, // Kernel type name
|
||||
KeyArgName = 12, // Kernel name
|
||||
KeyArgTypeKind = 13, // Kernel argument type kind
|
||||
KeyArgValueType = 14, // Kernel argument value type
|
||||
KeyArgAddrQual = 15, // Kernel argument address qualifier
|
||||
KeyArgAccQual = 16, // Kernel argument access qualifier
|
||||
KeyArgIsConst = 17, // Kernel argument is const qualified
|
||||
KeyArgIsRestrict = 18, // Kernel argument is restrict qualified
|
||||
KeyArgIsVolatile = 19, // Kernel argument is volatile qualified
|
||||
KeyArgIsPipe = 20, // Kernel argument is pipe qualified
|
||||
KeyReqdWorkGroupSize = 21, // Required work group size
|
||||
KeyWorkGroupSizeHint = 22, // Work group size hint
|
||||
KeyVecTypeHint = 23, // Vector type hint
|
||||
KeyKernelIndex = 24, // Kernel index for device enqueue
|
||||
KeySGPRs = 25, // Number of SGPRs
|
||||
KeyVGPRs = 26, // Number of VGPRs
|
||||
KeyMinWavesPerSIMD = 27, // Minimum number of waves per SIMD
|
||||
KeyMaxWavesPerSIMD = 28, // Maximum number of waves per SIMD
|
||||
KeyFlatWorkGroupSizeLimits = 29, // Flat work group size limits
|
||||
KeyMaxWorkGroupSize = 30, // Maximum work group size
|
||||
KeyNoPartialWorkGroups = 31, // No partial work groups
|
||||
};
|
||||
|
||||
enum Language : uint8_t {
|
||||
OpenCL_C = 0,
|
||||
HCC = 1,
|
||||
OpenMP = 2,
|
||||
OpenCL_CPP = 3,
|
||||
};
|
||||
|
||||
enum LanguageVersion : uint16_t {
|
||||
V100 = 100,
|
||||
V110 = 110,
|
||||
V120 = 120,
|
||||
V200 = 200,
|
||||
V210 = 210,
|
||||
};
|
||||
|
||||
namespace KernelArg {
|
||||
enum TypeKind : uint8_t {
|
||||
Value = 0,
|
||||
Pointer = 1,
|
||||
Image = 2,
|
||||
Sampler = 3,
|
||||
Queue = 4,
|
||||
};
|
||||
|
||||
enum ValueType : uint16_t {
|
||||
Struct = 0,
|
||||
I8 = 1,
|
||||
U8 = 2,
|
||||
I16 = 3,
|
||||
U16 = 4,
|
||||
F16 = 5,
|
||||
I32 = 6,
|
||||
U32 = 7,
|
||||
F32 = 8,
|
||||
I64 = 9,
|
||||
U64 = 10,
|
||||
F64 = 11,
|
||||
};
|
||||
|
||||
enum AccessQualifer : uint8_t {
|
||||
None = 0,
|
||||
ReadOnly = 1,
|
||||
WriteOnly = 2,
|
||||
ReadWrite = 3,
|
||||
};
|
||||
} // namespace KernelArg
|
||||
} // namespace RuntimeMD
|
||||
} // namespace AMDGPU
|
||||
|
||||
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
|
|
@ -0,0 +1,848 @@
|
|||
; RUN: llc -mtriple=amdgcn--amdhsa < %s | FileCheck %s
|
||||
|
||||
%struct.A = type { i8, float }
|
||||
%opencl.image1d_t = type opaque
|
||||
%opencl.image2d_t = type opaque
|
||||
%opencl.image3d_t = type opaque
|
||||
%opencl.queue_t = type opaque
|
||||
%opencl.pipe_t = type opaque
|
||||
%struct.B = type { i32 addrspace(1)*}
|
||||
%opencl.clk_event_t = type opaque
|
||||
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .short 256
|
||||
; CHECK-NEXT: .byte 2
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 3
|
||||
; CHECK-NEXT: .short 200
|
||||
|
||||
; CHECK-LABEL:{{^}}test_char:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 9
|
||||
; CHECK-NEXT: .ascii "test_char"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 1
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 1
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .ascii "char"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 1
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_char(i8 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_ushort2:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 12
|
||||
; CHECK-NEXT: .ascii "test_ushort2"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 7
|
||||
; CHECK-NEXT: .ascii "ushort2"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 4
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_ushort2(<2 x i16> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !10 !kernel_arg_base_type !10 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_int3:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 9
|
||||
; CHECK-NEXT: .ascii "test_int3"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 16
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 16
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .ascii "int3"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 6
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_int3(<3 x i32> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !11 !kernel_arg_base_type !11 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_ulong4:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 11
|
||||
; CHECK-NEXT: .ascii "test_ulong4"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 32
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 32
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 6
|
||||
; CHECK-NEXT: .ascii "ulong4"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 10
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_ulong4(<4 x i64> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !12 !kernel_arg_base_type !12 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_half8:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 10
|
||||
; CHECK-NEXT: .ascii "test_half8"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 16
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 16
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 5
|
||||
; CHECK-NEXT: .ascii "half8"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 5
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_half8(<8 x half> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !13 !kernel_arg_base_type !13 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_float16:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 12
|
||||
; CHECK-NEXT: .ascii "test_float16"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 64
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 64
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 7
|
||||
; CHECK-NEXT: .ascii "float16"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 8
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_float16(<16 x float> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !14 !kernel_arg_base_type !14 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_double16:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 13
|
||||
; CHECK-NEXT: .ascii "test_double16"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 128
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 128
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .ascii "double16"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 11
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_double16(<16 x double> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !15 !kernel_arg_base_type !15 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_pointer:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 12
|
||||
; CHECK-NEXT: .ascii "test_pointer"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 5
|
||||
; CHECK-NEXT: .ascii "int *"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 6
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 15
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_pointer(i32 addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_image:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 10
|
||||
; CHECK-NEXT: .ascii "test_image"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 9
|
||||
; CHECK-NEXT: .ascii "image2d_t"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 2
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 0
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 15
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_image(%opencl.image2d_t addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !17 !kernel_arg_base_type !17 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_sampler:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 12
|
||||
; CHECK-NEXT: .ascii "test_sampler"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 9
|
||||
; CHECK-NEXT: .ascii "sampler_t"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 3
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 6
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_sampler(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !18 !kernel_arg_base_type !18 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_queue:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 10
|
||||
; CHECK-NEXT: .ascii "test_queue"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 7
|
||||
; CHECK-NEXT: .ascii "queue_t"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 0
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 15
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_queue(%opencl.queue_t addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !19 !kernel_arg_base_type !19 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_struct:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 11
|
||||
; CHECK-NEXT: .ascii "test_struct"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .ascii "struct A"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 0
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 15
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_struct(%struct.A* byval %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20 !kernel_arg_base_type !20 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_i128:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 9
|
||||
; CHECK-NEXT: .ascii "test_i128"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 16
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .ascii "i128"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 0
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_i128(i128 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !21 !kernel_arg_base_type !21 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_multi_arg:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 14
|
||||
; CHECK-NEXT: .ascii "test_multi_arg"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 3
|
||||
; CHECK-NEXT: .ascii "int"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 6
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 6
|
||||
; CHECK-NEXT: .ascii "short2"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 3
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 5
|
||||
; CHECK-NEXT: .ascii "char3"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 1
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !24 !kernel_arg_base_type !24 !kernel_arg_type_qual !25 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_addr_space:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 15
|
||||
; CHECK-NEXT: .ascii "test_addr_space"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 5
|
||||
; CHECK-NEXT: .ascii "int *"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 6
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 15
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 5
|
||||
; CHECK-NEXT: .ascii "int *"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 6
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 15
|
||||
; CHECK-NEXT: .byte 2
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 5
|
||||
; CHECK-NEXT: .ascii "int *"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 6
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 15
|
||||
; CHECK-NEXT: .byte 3
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g, i32 addrspace(2)* %c, i32 addrspace(3)* %l) !kernel_arg_addr_space !50 !kernel_arg_access_qual !23 !kernel_arg_type !51 !kernel_arg_base_type !51 !kernel_arg_type_qual !25 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_type_qual:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 14
|
||||
; CHECK-NEXT: .ascii "test_type_qual"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 5
|
||||
; CHECK-NEXT: .ascii "int *"
|
||||
; CHECK-NEXT: .byte 19
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 6
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 15
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 5
|
||||
; CHECK-NEXT: .ascii "int *"
|
||||
; CHECK-NEXT: .byte 17
|
||||
; CHECK-NEXT: .byte 18
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 6
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 15
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 5
|
||||
; CHECK-NEXT: .ascii "int *"
|
||||
; CHECK-NEXT: .byte 20
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 0
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 15
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_type_qual(i32 addrspace(1)* %a, i32 addrspace(1)* %b, %opencl.pipe_t addrspace(1)* %c) !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !51 !kernel_arg_base_type !51 !kernel_arg_type_qual !70 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_access_qual:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 16
|
||||
; CHECK-NEXT: .ascii "test_access_qual"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 9
|
||||
; CHECK-NEXT: .ascii "image1d_t"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 2
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 0
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 15
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 9
|
||||
; CHECK-NEXT: .ascii "image2d_t"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 2
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 0
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 2
|
||||
; CHECK-NEXT: .byte 15
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 9
|
||||
; CHECK-NEXT: .ascii "image3d_t"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 2
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 0
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 3
|
||||
; CHECK-NEXT: .byte 15
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_access_qual(%opencl.image1d_t addrspace(1)* %ro, %opencl.image2d_t addrspace(1)* %wo, %opencl.image3d_t addrspace(1)* %rw) !kernel_arg_addr_space !60 !kernel_arg_access_qual !61 !kernel_arg_type !62 !kernel_arg_base_type !62 !kernel_arg_type_qual !25 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_reqd_wgs_vec_type_hint:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 27
|
||||
; CHECK-NEXT: .ascii "test_reqd_wgs_vec_type_hint"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 3
|
||||
; CHECK-NEXT: .ascii "int"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 6
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 21
|
||||
; CHECK-NEXT: .long 1
|
||||
; CHECK-NEXT: .long 2
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 23
|
||||
; CHECK-NEXT: .long 3
|
||||
; CHECK-NEXT: .ascii "int"
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !5 !reqd_work_group_size !6 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_wgs_hint_vec_type_hint:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 27
|
||||
; CHECK-NEXT: .ascii "test_wgs_hint_vec_type_hint"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 3
|
||||
; CHECK-NEXT: .ascii "int"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 6
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 22
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .long 16
|
||||
; CHECK-NEXT: .long 32
|
||||
; CHECK-NEXT: .byte 23
|
||||
; CHECK-NEXT: .long 5
|
||||
; CHECK-NEXT: .ascii "uint4"
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !7 !work_group_size_hint !8 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_arg_ptr_to_ptr:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 19
|
||||
; CHECK-NEXT: .ascii "test_arg_ptr_to_ptr"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 6
|
||||
; CHECK-NEXT: .ascii "int **"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 6
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 15
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_arg_ptr_to_ptr(i32 * addrspace(1)* %a) !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !80 !kernel_arg_base_type !80 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_arg_struct_contains_ptr:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 28
|
||||
; CHECK-NEXT: .ascii "test_arg_struct_contains_ptr"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .ascii "struct B"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 0
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 15
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B * byval %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !82 !kernel_arg_base_type !82 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_arg_vector_of_ptr:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 22
|
||||
; CHECK-NEXT: .ascii "test_arg_vector_of_ptr"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 16
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 16
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 47
|
||||
; CHECK-NEXT: .ascii "global int* __attribute__((ext_vector_type(2)))"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 6
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x i32 addrspace(1)*> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !83 !kernel_arg_base_type !83 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL:{{^}}test_arg_unknown_builtin_type:
|
||||
; CHECK: .section .AMDGPU.runtime_metadata
|
||||
; CHECK-NEXT: .byte 4
|
||||
; CHECK-NEXT: .byte 6
|
||||
; CHECK-NEXT: .long 29
|
||||
; CHECK-NEXT: .ascii "test_arg_unknown_builtin_type"
|
||||
; CHECK-NEXT: .byte 7
|
||||
; CHECK-NEXT: .byte 9
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 10
|
||||
; CHECK-NEXT: .long 8
|
||||
; CHECK-NEXT: .byte 11
|
||||
; CHECK-NEXT: .long 11
|
||||
; CHECK-NEXT: .ascii "clk_event_t"
|
||||
; CHECK-NEXT: .byte 13
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 14
|
||||
; CHECK-NEXT: .short 0
|
||||
; CHECK-NEXT: .byte 16
|
||||
; CHECK-NEXT: .byte 0
|
||||
; CHECK-NEXT: .byte 15
|
||||
; CHECK-NEXT: .byte 1
|
||||
; CHECK-NEXT: .byte 8
|
||||
; CHECK-NEXT: .byte 5
|
||||
|
||||
define amdgpu_kernel void @test_arg_unknown_builtin_type(%opencl.clk_event_t addrspace(1)* %a) !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !84 !kernel_arg_base_type !84 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
!1 = !{i32 0}
|
||||
!2 = !{!"none"}
|
||||
!3 = !{!"int"}
|
||||
!4 = !{!""}
|
||||
!5 = !{i32 undef, i32 1}
|
||||
!6 = !{i32 1, i32 2, i32 4}
|
||||
!7 = !{<4 x i32> undef, i32 0}
|
||||
!8 = !{i32 8, i32 16, i32 32}
|
||||
!9 = !{!"char"}
|
||||
!10 = !{!"ushort2"}
|
||||
!11 = !{!"int3"}
|
||||
!12 = !{!"ulong4"}
|
||||
!13 = !{!"half8"}
|
||||
!14 = !{!"float16"}
|
||||
!15 = !{!"double16"}
|
||||
!16 = !{!"int *"}
|
||||
!17 = !{!"image2d_t"}
|
||||
!18 = !{!"sampler_t"}
|
||||
!19 = !{!"queue_t"}
|
||||
!20 = !{!"struct A"}
|
||||
!21 = !{!"i128"}
|
||||
!22 = !{i32 0, i32 0, i32 0}
|
||||
!23 = !{!"none", !"none", !"none"}
|
||||
!24 = !{!"int", !"short2", !"char3"}
|
||||
!25 = !{!"", !"", !""}
|
||||
!50 = !{i32 1, i32 2, i32 3}
|
||||
!51 = !{!"int *", !"int *", !"int *"}
|
||||
!60 = !{i32 1, i32 1, i32 1}
|
||||
!61 = !{!"read_only", !"write_only", !"read_write"}
|
||||
!62 = !{!"image1d_t", !"image2d_t", !"image3d_t"}
|
||||
!70 = !{!"volatile", !"const restrict", !"pipe"}
|
||||
!80 = !{!"int **"}
|
||||
!81 = !{i32 1}
|
||||
!82 = !{!"struct B"}
|
||||
!83 = !{!"global int* __attribute__((ext_vector_type(2)))"}
|
||||
!84 = !{!"clk_event_t"}
|
||||
!opencl.ocl.version = !{!90}
|
||||
!90 = !{i32 2, i32 0}
|
Loading…
Reference in New Issue