forked from OSchip/llvm-project
[AMDGPU] gfx1030 RT support
Differential Revision: https://reviews.llvm.org/D87782
This commit is contained in:
parent
56069b5c71
commit
91f503c3af
|
@ -1698,6 +1698,14 @@ class AMDGPUGlobalAtomicRtn<LLVMType vt> : Intrinsic <
|
||||||
|
|
||||||
def int_amdgcn_global_atomic_csub : AMDGPUGlobalAtomicRtn<llvm_i32_ty>;
|
def int_amdgcn_global_atomic_csub : AMDGPUGlobalAtomicRtn<llvm_i32_ty>;
|
||||||
|
|
||||||
|
// uint4 llvm.amdgcn.image.bvh.intersect.ray <node_ptr>, <ray_extent>, <ray_origin>,
|
||||||
|
// <ray_dir>, ray_inv_dir>, <texture_descr>
|
||||||
|
def int_amdgcn_image_bvh_intersect_ray :
|
||||||
|
Intrinsic<[llvm_v4i32_ty],
|
||||||
|
[llvm_anyint_ty, llvm_float_ty, llvm_v4f32_ty, llvm_anyvector_ty,
|
||||||
|
LLVMMatchType<1>, llvm_v4i32_ty],
|
||||||
|
[IntrReadMem]>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Deep learning intrinsics.
|
// Deep learning intrinsics.
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
|
@ -1444,6 +1444,7 @@ public:
|
||||||
void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
|
void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
|
||||||
bool IsAtomic = false);
|
bool IsAtomic = false);
|
||||||
void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
|
void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
|
||||||
|
void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
|
||||||
|
|
||||||
OperandMatchResultTy parseDim(OperandVector &Operands);
|
OperandMatchResultTy parseDim(OperandVector &Operands);
|
||||||
OperandMatchResultTy parseDPP8(OperandVector &Operands);
|
OperandMatchResultTy parseDPP8(OperandVector &Operands);
|
||||||
|
@ -3109,8 +3110,9 @@ bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
|
||||||
int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
|
int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
|
||||||
|
|
||||||
assert(VDataIdx != -1);
|
assert(VDataIdx != -1);
|
||||||
assert(DMaskIdx != -1);
|
|
||||||
assert(TFEIdx != -1);
|
if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
|
||||||
|
return true;
|
||||||
|
|
||||||
unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
|
unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
|
||||||
unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
|
unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
|
||||||
|
@ -3137,6 +3139,7 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
|
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
|
||||||
|
|
||||||
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
|
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
|
||||||
AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
|
AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
|
||||||
int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
|
int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
|
||||||
|
@ -3145,9 +3148,11 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
|
||||||
|
|
||||||
assert(VAddr0Idx != -1);
|
assert(VAddr0Idx != -1);
|
||||||
assert(SrsrcIdx != -1);
|
assert(SrsrcIdx != -1);
|
||||||
assert(DimIdx != -1);
|
|
||||||
assert(SrsrcIdx > VAddr0Idx);
|
assert(SrsrcIdx > VAddr0Idx);
|
||||||
|
|
||||||
|
if (DimIdx == -1)
|
||||||
|
return true; // intersect_ray
|
||||||
|
|
||||||
unsigned Dim = Inst.getOperand(DimIdx).getImm();
|
unsigned Dim = Inst.getOperand(DimIdx).getImm();
|
||||||
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
|
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
|
||||||
bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
|
bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
|
||||||
|
@ -6466,6 +6471,17 @@ void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands)
|
||||||
cvtMIMG(Inst, Operands, true);
|
cvtMIMG(Inst, Operands, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
|
||||||
|
const OperandVector &Operands) {
|
||||||
|
for (unsigned I = 1; I < Operands.size(); ++I) {
|
||||||
|
auto &Operand = (AMDGPUOperand &)*Operands[I];
|
||||||
|
if (Operand.isReg())
|
||||||
|
Operand.addRegOperands(Inst, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
Inst.addOperand(MCOperand::createImm(1)); // a16
|
||||||
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// smrd
|
// smrd
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
|
@ -139,6 +139,8 @@ DECODE_OPERAND_REG(VS_128)
|
||||||
DECODE_OPERAND_REG(VReg_64)
|
DECODE_OPERAND_REG(VReg_64)
|
||||||
DECODE_OPERAND_REG(VReg_96)
|
DECODE_OPERAND_REG(VReg_96)
|
||||||
DECODE_OPERAND_REG(VReg_128)
|
DECODE_OPERAND_REG(VReg_128)
|
||||||
|
DECODE_OPERAND_REG(VReg_256)
|
||||||
|
DECODE_OPERAND_REG(VReg_512)
|
||||||
|
|
||||||
DECODE_OPERAND_REG(SReg_32)
|
DECODE_OPERAND_REG(SReg_32)
|
||||||
DECODE_OPERAND_REG(SReg_32_XM0_XEXEC)
|
DECODE_OPERAND_REG(SReg_32_XM0_XEXEC)
|
||||||
|
@ -499,8 +501,16 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
|
||||||
AMDGPU::OpName::d16);
|
AMDGPU::OpName::d16);
|
||||||
|
|
||||||
assert(VDataIdx != -1);
|
assert(VDataIdx != -1);
|
||||||
assert(DMaskIdx != -1);
|
if (DMaskIdx == -1 || TFEIdx == -1) {// intersect_ray
|
||||||
assert(TFEIdx != -1);
|
if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16) > -1) {
|
||||||
|
assert(MI.getOpcode() == AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_sa ||
|
||||||
|
MI.getOpcode() == AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_nsa ||
|
||||||
|
MI.getOpcode() == AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_sa ||
|
||||||
|
MI.getOpcode() == AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa);
|
||||||
|
addOperand(MI, MCOperand::createImm(1));
|
||||||
|
}
|
||||||
|
return MCDisassembler::Success;
|
||||||
|
}
|
||||||
|
|
||||||
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
|
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
|
||||||
bool IsAtomic = (VDstIdx != -1);
|
bool IsAtomic = (VDstIdx != -1);
|
||||||
|
|
|
@ -708,6 +708,55 @@ multiclass MIMG_Gather <bits<8> op, AMDGPUSampleVariant sample, bit wqm = 0,
|
||||||
multiclass MIMG_Gather_WQM <bits<8> op, AMDGPUSampleVariant sample>
|
multiclass MIMG_Gather_WQM <bits<8> op, AMDGPUSampleVariant sample>
|
||||||
: MIMG_Gather<op, sample, 1>;
|
: MIMG_Gather<op, sample, 1>;
|
||||||
|
|
||||||
|
class MIMG_IntersectRay_gfx10<int op, string opcode, RegisterClass AddrRC, bit A16>
|
||||||
|
: MIMG_gfx10<op, (outs VReg_128:$vdata), "AMDGPU"> {
|
||||||
|
|
||||||
|
let InOperandList = !con((ins AddrRC:$vaddr0, SReg_128:$srsrc),
|
||||||
|
!if(!eq(A16,1), (ins GFX10A16:$a16), (ins)));
|
||||||
|
let AsmString = opcode#" $vdata, $vaddr0, $srsrc"#!if(!eq(A16,1), "$a16", "");
|
||||||
|
|
||||||
|
let nsa = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
class MIMG_IntersectRay_nsa_gfx10<int op, string opcode, int num_addrs, bit A16>
|
||||||
|
: MIMG_nsa_gfx10<op, (outs VReg_128:$vdata), num_addrs, "AMDGPU"> {
|
||||||
|
let InOperandList = !con(nsah.AddrIns,
|
||||||
|
(ins SReg_128:$srsrc),
|
||||||
|
!if(!eq(A16,1), (ins GFX10A16:$a16), (ins)));
|
||||||
|
let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc"#!if(!eq(A16,1), "$a16", "");
|
||||||
|
}
|
||||||
|
|
||||||
|
multiclass MIMG_IntersectRay<int op, string opcode, int num_addrs, bit A16> {
|
||||||
|
def "" : MIMGBaseOpcode;
|
||||||
|
let SubtargetPredicate = HasGFX10_BEncoding,
|
||||||
|
AssemblerPredicate = HasGFX10_BEncoding,
|
||||||
|
AsmMatchConverter = !if(!eq(A16,1), "cvtIntersectRay", ""),
|
||||||
|
dmask = 0xf,
|
||||||
|
unorm = 1,
|
||||||
|
d16 = 0,
|
||||||
|
glc = 0,
|
||||||
|
slc = 0,
|
||||||
|
dlc = 0,
|
||||||
|
tfe = 0,
|
||||||
|
lwe = 0,
|
||||||
|
r128 = 1,
|
||||||
|
ssamp = 0,
|
||||||
|
dim = {0, 0, 0},
|
||||||
|
a16 = A16,
|
||||||
|
d16 = 0,
|
||||||
|
BaseOpcode = !cast<MIMGBaseOpcode>(NAME),
|
||||||
|
VDataDwords = 4 in {
|
||||||
|
// TODO: MIMGAddrSize will choose VReg_512 which is a 16 register tuple,
|
||||||
|
// when we only need 9, 11 or 12 depending on A16 field and ptr size.
|
||||||
|
def "_sa" : MIMG_IntersectRay_gfx10<op, opcode, MIMGAddrSize<num_addrs, 0>.RegClass, A16> {
|
||||||
|
let VAddrDwords = !srl(MIMGAddrSize<num_addrs, 0>.RegClass.Size, 5);
|
||||||
|
}
|
||||||
|
def _nsa : MIMG_IntersectRay_nsa_gfx10<op, opcode, num_addrs, A16> {
|
||||||
|
let VAddrDwords = num_addrs;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// MIMG Instructions
|
// MIMG Instructions
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@ -832,6 +881,11 @@ defm IMAGE_SAMPLE_C_CD_CL_O_G16 : MIMG_Sampler <0x000000ef, AMDGPUSample_c_cd_cl
|
||||||
let SubtargetPredicate = HasGFX10_BEncoding in
|
let SubtargetPredicate = HasGFX10_BEncoding in
|
||||||
defm IMAGE_MSAA_LOAD : MIMG_NoSampler <0x00000080, "image_msaa_load", 1>;
|
defm IMAGE_MSAA_LOAD : MIMG_NoSampler <0x00000080, "image_msaa_load", 1>;
|
||||||
|
|
||||||
|
defm IMAGE_BVH_INTERSECT_RAY : MIMG_IntersectRay<0xe6, "image_bvh_intersect_ray", 11, 0>;
|
||||||
|
defm IMAGE_BVH_INTERSECT_RAY_a16 : MIMG_IntersectRay<0xe6, "image_bvh_intersect_ray", 8, 1>;
|
||||||
|
defm IMAGE_BVH64_INTERSECT_RAY : MIMG_IntersectRay<0xe7, "image_bvh64_intersect_ray", 12, 0>;
|
||||||
|
defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay<0xe7, "image_bvh64_intersect_ray", 9, 1>;
|
||||||
|
|
||||||
/********** ========================================= **********/
|
/********** ========================================= **********/
|
||||||
/********** Table of dimension-aware image intrinsics **********/
|
/********** Table of dimension-aware image intrinsics **********/
|
||||||
/********** ========================================= **********/
|
/********** ========================================= **********/
|
||||||
|
|
|
@ -80,9 +80,8 @@ bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) {
|
||||||
MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe);
|
MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe);
|
||||||
MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16);
|
MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16);
|
||||||
|
|
||||||
// Check for instructions that don't have tfe or lwe fields
|
if (!TFE && !LWE) // intersect_ray
|
||||||
// There shouldn't be any at this point.
|
continue;
|
||||||
assert( (TFE && LWE) && "Expected tfe and lwe operands in instruction");
|
|
||||||
|
|
||||||
unsigned TFEVal = TFE->getImm();
|
unsigned TFEVal = TFE->getImm();
|
||||||
unsigned LWEVal = LWE->getImm();
|
unsigned LWEVal = LWE->getImm();
|
||||||
|
|
|
@ -1194,6 +1194,17 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
||||||
MachineMemOperand::MOVolatile;
|
MachineMemOperand::MOVolatile;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
case Intrinsic::amdgcn_image_bvh_intersect_ray: {
|
||||||
|
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||||
|
Info.opc = ISD::INTRINSIC_W_CHAIN;
|
||||||
|
Info.memVT = MVT::getVT(CI.getType()); // XXX: what is correct VT?
|
||||||
|
Info.ptrVal = MFI->getImagePSV(
|
||||||
|
*MF.getSubtarget<GCNSubtarget>().getInstrInfo(), CI.getArgOperand(5));
|
||||||
|
Info.align.reset();
|
||||||
|
Info.flags = MachineMemOperand::MOLoad |
|
||||||
|
MachineMemOperand::MODereferenceable;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
case Intrinsic::amdgcn_ds_gws_init:
|
case Intrinsic::amdgcn_ds_gws_init:
|
||||||
case Intrinsic::amdgcn_ds_gws_barrier:
|
case Intrinsic::amdgcn_ds_gws_barrier:
|
||||||
case Intrinsic::amdgcn_ds_gws_sema_v:
|
case Intrinsic::amdgcn_ds_gws_sema_v:
|
||||||
|
@ -7318,6 +7329,76 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
||||||
DAG.getVTList(VT, MVT::Other), Ops,
|
DAG.getVTList(VT, MVT::Other), Ops,
|
||||||
M->getMemOperand());
|
M->getMemOperand());
|
||||||
}
|
}
|
||||||
|
case Intrinsic::amdgcn_image_bvh_intersect_ray: {
|
||||||
|
SDLoc DL(Op);
|
||||||
|
MemSDNode *M = cast<MemSDNode>(Op);
|
||||||
|
SDValue NodePtr = M->getOperand(2);
|
||||||
|
SDValue RayExtent = M->getOperand(3);
|
||||||
|
SDValue RayOrigin = M->getOperand(4);
|
||||||
|
SDValue RayDir = M->getOperand(5);
|
||||||
|
SDValue RayInvDir = M->getOperand(6);
|
||||||
|
SDValue TDescr = M->getOperand(7);
|
||||||
|
|
||||||
|
assert(NodePtr.getValueType() == MVT::i32 ||
|
||||||
|
NodePtr.getValueType() == MVT::i64);
|
||||||
|
assert(RayDir.getValueType() == MVT::v4f16 ||
|
||||||
|
RayDir.getValueType() == MVT::v4f32);
|
||||||
|
|
||||||
|
bool IsA16 = RayDir.getValueType().getVectorElementType() == MVT::f16;
|
||||||
|
bool Is64 = NodePtr.getValueType() == MVT::i64;
|
||||||
|
unsigned Opcode = IsA16 ? Is64 ? AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa
|
||||||
|
: AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_nsa
|
||||||
|
: Is64 ? AMDGPU::IMAGE_BVH64_INTERSECT_RAY_nsa
|
||||||
|
: AMDGPU::IMAGE_BVH_INTERSECT_RAY_nsa;
|
||||||
|
|
||||||
|
SmallVector<SDValue, 16> Ops;
|
||||||
|
|
||||||
|
auto packLanes = [&DAG, &Ops, &DL] (SDValue Op, bool IsAligned) {
|
||||||
|
SmallVector<SDValue, 3> Lanes;
|
||||||
|
DAG.ExtractVectorElements(Op, Lanes, 0, 3);
|
||||||
|
if (Lanes[0].getValueSizeInBits() == 32) {
|
||||||
|
for (unsigned I = 0; I < 3; ++I)
|
||||||
|
Ops.push_back(DAG.getBitcast(MVT::i32, Lanes[I]));
|
||||||
|
} else {
|
||||||
|
if (IsAligned) {
|
||||||
|
Ops.push_back(
|
||||||
|
DAG.getBitcast(MVT::i32,
|
||||||
|
DAG.getBuildVector(MVT::v2f16, DL,
|
||||||
|
{ Lanes[0], Lanes[1] })));
|
||||||
|
Ops.push_back(Lanes[2]);
|
||||||
|
} else {
|
||||||
|
SDValue Elt0 = Ops.pop_back_val();
|
||||||
|
Ops.push_back(
|
||||||
|
DAG.getBitcast(MVT::i32,
|
||||||
|
DAG.getBuildVector(MVT::v2f16, DL,
|
||||||
|
{ Elt0, Lanes[0] })));
|
||||||
|
Ops.push_back(
|
||||||
|
DAG.getBitcast(MVT::i32,
|
||||||
|
DAG.getBuildVector(MVT::v2f16, DL,
|
||||||
|
{ Lanes[1], Lanes[2] })));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if (Is64)
|
||||||
|
DAG.ExtractVectorElements(DAG.getBitcast(MVT::v2i32, NodePtr), Ops, 0, 2);
|
||||||
|
else
|
||||||
|
Ops.push_back(NodePtr);
|
||||||
|
|
||||||
|
Ops.push_back(DAG.getBitcast(MVT::i32, RayExtent));
|
||||||
|
packLanes(RayOrigin, true);
|
||||||
|
packLanes(RayDir, true);
|
||||||
|
packLanes(RayInvDir, false);
|
||||||
|
Ops.push_back(TDescr);
|
||||||
|
if (IsA16)
|
||||||
|
Ops.push_back(DAG.getTargetConstant(1, DL, MVT::i1));
|
||||||
|
Ops.push_back(M->getChain());
|
||||||
|
|
||||||
|
auto *NewNode = DAG.getMachineNode(Opcode, DL, M->getVTList(), Ops);
|
||||||
|
MachineMemOperand *MemRef = M->getMemOperand();
|
||||||
|
DAG.setNodeMemRefs(NewNode, {MemRef});
|
||||||
|
return SDValue(NewNode, 0);
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
|
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
|
||||||
AMDGPU::getImageDimIntrinsicInfo(IntrID))
|
AMDGPU::getImageDimIntrinsicInfo(IntrID))
|
||||||
|
@ -10963,7 +11044,8 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
|
||||||
unsigned Opcode = Node->getMachineOpcode();
|
unsigned Opcode = Node->getMachineOpcode();
|
||||||
|
|
||||||
if (TII->isMIMG(Opcode) && !TII->get(Opcode).mayStore() &&
|
if (TII->isMIMG(Opcode) && !TII->get(Opcode).mayStore() &&
|
||||||
!TII->isGather4(Opcode)) {
|
!TII->isGather4(Opcode) &&
|
||||||
|
AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::dmask) != -1) {
|
||||||
return adjustWritemask(Node, DAG);
|
return adjustWritemask(Node, DAG);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -393,6 +393,15 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
|
||||||
case AMDGPU::DS_WRITE_B64:
|
case AMDGPU::DS_WRITE_B64:
|
||||||
case AMDGPU::DS_WRITE_B64_gfx9:
|
case AMDGPU::DS_WRITE_B64_gfx9:
|
||||||
return DS_WRITE;
|
return DS_WRITE;
|
||||||
|
case AMDGPU::IMAGE_BVH_INTERSECT_RAY_sa:
|
||||||
|
case AMDGPU::IMAGE_BVH64_INTERSECT_RAY_sa:
|
||||||
|
case AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_sa:
|
||||||
|
case AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_sa:
|
||||||
|
case AMDGPU::IMAGE_BVH_INTERSECT_RAY_nsa:
|
||||||
|
case AMDGPU::IMAGE_BVH64_INTERSECT_RAY_nsa:
|
||||||
|
case AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_nsa:
|
||||||
|
case AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa:
|
||||||
|
return UNKNOWN;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -272,8 +272,8 @@ void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) {
|
||||||
// enabled
|
// enabled
|
||||||
int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
|
int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
|
||||||
int LWEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::lwe);
|
int LWEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::lwe);
|
||||||
unsigned TFEVal = MI.getOperand(TFEIdx).getImm();
|
unsigned TFEVal = (TFEIdx == -1) ? 0 : MI.getOperand(TFEIdx).getImm();
|
||||||
unsigned LWEVal = MI.getOperand(LWEIdx).getImm();
|
unsigned LWEVal = (LWEIdx == -1) ? 0 : MI.getOperand(LWEIdx).getImm();
|
||||||
int ToUntie = -1;
|
int ToUntie = -1;
|
||||||
if (TFEVal || LWEVal) {
|
if (TFEVal || LWEVal) {
|
||||||
// TFE/LWE is enabled so we need to deal with an implicit tied operand
|
// TFE/LWE is enabled so we need to deal with an implicit tied operand
|
||||||
|
|
|
@ -0,0 +1,162 @@
|
||||||
|
; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||||
|
|
||||||
|
; uint4 llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(uint node_ptr, float ray_extent, float4 ray_origin, float4 ray_dir, float4 ray_inv_dir, uint4 texture_descr)
|
||||||
|
; uint4 llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(uint node_ptr, float ray_extent, float4 ray_origin, half4 ray_dir, half4 ray_inv_dir, uint4 texture_descr)
|
||||||
|
; uint4 llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(ulong node_ptr, float ray_extent, float4 ray_origin, float4 ray_dir, float4 ray_inv_dir, uint4 texture_descr)
|
||||||
|
; uint4 llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(ulong node_ptr, float ray_extent, float4 ray_origin, half4 ray_dir, half4 ray_inv_dir, uint4 texture_descr)
|
||||||
|
|
||||||
|
declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(i32, float, <4 x float>, <4 x float>, <4 x float>, <4 x i32>)
|
||||||
|
declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(i32, float, <4 x float>, <4 x half>, <4 x half>, <4 x i32>)
|
||||||
|
declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64, float, <4 x float>, <4 x float>, <4 x float>, <4 x i32>)
|
||||||
|
declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64, float, <4 x float>, <4 x half>, <4 x half>, <4 x i32>)
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}image_bvh_intersect_ray:
|
||||||
|
; GCN: image_bvh_intersect_ray v[0:3], v[0:15], s[0:3]{{$}}
|
||||||
|
; Arguments are flattened to represent the actual VGPR_A layout, so we have no
|
||||||
|
; extra moves in the generated kernel.
|
||||||
|
define amdgpu_ps <4 x float> @image_bvh_intersect_ray(i32 %node_ptr, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, float %ray_inv_dir_x, float %ray_inv_dir_y, float %ray_inv_dir_z, <4 x i32> inreg %tdescr) {
|
||||||
|
main_body:
|
||||||
|
%ray_origin0 = insertelement <4 x float> undef, float %ray_origin_x, i32 0
|
||||||
|
%ray_origin1 = insertelement <4 x float> %ray_origin0, float %ray_origin_y, i32 1
|
||||||
|
%ray_origin = insertelement <4 x float> %ray_origin1, float %ray_origin_z, i32 2
|
||||||
|
%ray_dir0 = insertelement <4 x float> undef, float %ray_dir_x, i32 0
|
||||||
|
%ray_dir1 = insertelement <4 x float> %ray_dir0, float %ray_dir_y, i32 1
|
||||||
|
%ray_dir = insertelement <4 x float> %ray_dir1, float %ray_dir_z, i32 2
|
||||||
|
%ray_inv_dir0 = insertelement <4 x float> undef, float %ray_inv_dir_x, i32 0
|
||||||
|
%ray_inv_dir1 = insertelement <4 x float> %ray_inv_dir0, float %ray_inv_dir_y, i32 1
|
||||||
|
%ray_inv_dir = insertelement <4 x float> %ray_inv_dir1, float %ray_inv_dir_z, i32 2
|
||||||
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(i32 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> %tdescr)
|
||||||
|
%r = bitcast <4 x i32> %v to <4 x float>
|
||||||
|
ret <4 x float> %r
|
||||||
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}image_bvh_intersect_ray_a16:
|
||||||
|
; GCN: image_bvh_intersect_ray v[0:3], v[{{[0-9:]+}}], s[{{[0-9:]+}}] a16{{$}}
|
||||||
|
define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16(i32 inreg %node_ptr, float inreg %ray_extent, <4 x float> inreg %ray_origin, <4 x half> inreg %ray_dir, <4 x half> inreg %ray_inv_dir, <4 x i32> inreg %tdescr) {
|
||||||
|
main_body:
|
||||||
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(i32 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x half> %ray_dir, <4 x half> %ray_inv_dir, <4 x i32> %tdescr)
|
||||||
|
%r = bitcast <4 x i32> %v to <4 x float>
|
||||||
|
ret <4 x float> %r
|
||||||
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}image_bvh64_intersect_ray:
|
||||||
|
; GCN: image_bvh64_intersect_ray v[0:3], v[0:15], s[0:3]{{$}}
|
||||||
|
; Arguments are flattened to represent the actual VGPR_A layout, so we have no
|
||||||
|
; extra moves in the generated kernel.
|
||||||
|
define amdgpu_ps <4 x float> @image_bvh64_intersect_ray(<2 x i32> %node_ptr_vec, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, float %ray_inv_dir_x, float %ray_inv_dir_y, float %ray_inv_dir_z, <4 x i32> inreg %tdescr) {
|
||||||
|
main_body:
|
||||||
|
%node_ptr = bitcast <2 x i32> %node_ptr_vec to i64
|
||||||
|
%ray_origin0 = insertelement <4 x float> undef, float %ray_origin_x, i32 0
|
||||||
|
%ray_origin1 = insertelement <4 x float> %ray_origin0, float %ray_origin_y, i32 1
|
||||||
|
%ray_origin = insertelement <4 x float> %ray_origin1, float %ray_origin_z, i32 2
|
||||||
|
%ray_dir0 = insertelement <4 x float> undef, float %ray_dir_x, i32 0
|
||||||
|
%ray_dir1 = insertelement <4 x float> %ray_dir0, float %ray_dir_y, i32 1
|
||||||
|
%ray_dir = insertelement <4 x float> %ray_dir1, float %ray_dir_z, i32 2
|
||||||
|
%ray_inv_dir0 = insertelement <4 x float> undef, float %ray_inv_dir_x, i32 0
|
||||||
|
%ray_inv_dir1 = insertelement <4 x float> %ray_inv_dir0, float %ray_inv_dir_y, i32 1
|
||||||
|
%ray_inv_dir = insertelement <4 x float> %ray_inv_dir1, float %ray_inv_dir_z, i32 2
|
||||||
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> %tdescr)
|
||||||
|
%r = bitcast <4 x i32> %v to <4 x float>
|
||||||
|
ret <4 x float> %r
|
||||||
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}image_bvh64_intersect_ray_a16:
|
||||||
|
; GCN: image_bvh64_intersect_ray v[0:3], v[{{[0-9:]+}}], s[{{[0-9:]+}}] a16{{$}}
|
||||||
|
define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16(i64 inreg %node_ptr, float inreg %ray_extent, <4 x float> inreg %ray_origin, <4 x half> inreg %ray_dir, <4 x half> inreg %ray_inv_dir, <4 x i32> inreg %tdescr) {
|
||||||
|
main_body:
|
||||||
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x half> %ray_dir, <4 x half> %ray_inv_dir, <4 x i32> %tdescr)
|
||||||
|
%r = bitcast <4 x i32> %v to <4 x float>
|
||||||
|
ret <4 x float> %r
|
||||||
|
}
|
||||||
|
|
||||||
|
; TODO: NSA reassign is very limited and cannot work with VGPR tuples and subregs.
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}image_bvh_intersect_ray_nsa_reassign:
|
||||||
|
; GCN: image_bvh_intersect_ray v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
|
||||||
|
define amdgpu_kernel void @image_bvh_intersect_ray_nsa_reassign(i32* %p_node_ptr, float* %p_ray, <4 x i32> inreg %tdescr) {
|
||||||
|
main_body:
|
||||||
|
%lid = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||||
|
%gep_node_ptr = getelementptr inbounds i32, i32* %p_node_ptr, i32 %lid
|
||||||
|
%node_ptr = load i32, i32* %gep_node_ptr, align 4
|
||||||
|
%gep_ray = getelementptr inbounds float, float* %p_ray, i32 %lid
|
||||||
|
%ray_extent = load float, float* %gep_ray, align 4
|
||||||
|
%ray_origin0 = insertelement <4 x float> undef, float 0.0, i32 0
|
||||||
|
%ray_origin1 = insertelement <4 x float> %ray_origin0, float 1.0, i32 1
|
||||||
|
%ray_origin = insertelement <4 x float> %ray_origin1, float 2.0, i32 2
|
||||||
|
%ray_dir0 = insertelement <4 x float> undef, float 3.0, i32 0
|
||||||
|
%ray_dir1 = insertelement <4 x float> %ray_dir0, float 4.0, i32 1
|
||||||
|
%ray_dir = insertelement <4 x float> %ray_dir1, float 5.0, i32 2
|
||||||
|
%ray_inv_dir0 = insertelement <4 x float> undef, float 6.0, i32 0
|
||||||
|
%ray_inv_dir1 = insertelement <4 x float> %ray_inv_dir0, float 7.0, i32 1
|
||||||
|
%ray_inv_dir = insertelement <4 x float> %ray_inv_dir1, float 8.0, i32 2
|
||||||
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(i32 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> %tdescr)
|
||||||
|
store <4 x i32> %v, <4 x i32>* undef
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}image_bvh_intersect_ray_a16_nsa_reassign:
|
||||||
|
; GCN: image_bvh_intersect_ray v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}] a16{{$}}
|
||||||
|
define amdgpu_kernel void @image_bvh_intersect_ray_a16_nsa_reassign(i32* %p_node_ptr, float* %p_ray, <4 x i32> inreg %tdescr) {
|
||||||
|
main_body:
|
||||||
|
%lid = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||||
|
%gep_node_ptr = getelementptr inbounds i32, i32* %p_node_ptr, i32 %lid
|
||||||
|
%node_ptr = load i32, i32* %gep_node_ptr, align 4
|
||||||
|
%gep_ray = getelementptr inbounds float, float* %p_ray, i32 %lid
|
||||||
|
%ray_extent = load float, float* %gep_ray, align 4
|
||||||
|
%ray_origin0 = insertelement <4 x float> undef, float 0.0, i32 0
|
||||||
|
%ray_origin1 = insertelement <4 x float> %ray_origin0, float 1.0, i32 1
|
||||||
|
%ray_origin = insertelement <4 x float> %ray_origin1, float 2.0, i32 2
|
||||||
|
%ray_dir0 = insertelement <4 x half> undef, half 3.0, i32 0
|
||||||
|
%ray_dir1 = insertelement <4 x half> %ray_dir0, half 4.0, i32 1
|
||||||
|
%ray_dir = insertelement <4 x half> %ray_dir1, half 5.0, i32 2
|
||||||
|
%ray_inv_dir0 = insertelement <4 x half> undef, half 6.0, i32 0
|
||||||
|
%ray_inv_dir1 = insertelement <4 x half> %ray_inv_dir0, half 7.0, i32 1
|
||||||
|
%ray_inv_dir = insertelement <4 x half> %ray_inv_dir1, half 8.0, i32 2
|
||||||
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(i32 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x half> %ray_dir, <4 x half> %ray_inv_dir, <4 x i32> %tdescr)
|
||||||
|
store <4 x i32> %v, <4 x i32>* undef
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}image_bvh64_intersect_ray_nsa_reassign:
|
||||||
|
; GCN: image_bvh64_intersect_ray v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
|
||||||
|
define amdgpu_kernel void @image_bvh64_intersect_ray_nsa_reassign(float* %p_ray, <4 x i32> inreg %tdescr) {
|
||||||
|
main_body:
|
||||||
|
%lid = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||||
|
%gep_ray = getelementptr inbounds float, float* %p_ray, i32 %lid
|
||||||
|
%ray_extent = load float, float* %gep_ray, align 4
|
||||||
|
%ray_origin0 = insertelement <4 x float> undef, float 0.0, i32 0
|
||||||
|
%ray_origin1 = insertelement <4 x float> %ray_origin0, float 1.0, i32 1
|
||||||
|
%ray_origin = insertelement <4 x float> %ray_origin1, float 2.0, i32 2
|
||||||
|
%ray_dir0 = insertelement <4 x float> undef, float 3.0, i32 0
|
||||||
|
%ray_dir1 = insertelement <4 x float> %ray_dir0, float 4.0, i32 1
|
||||||
|
%ray_dir = insertelement <4 x float> %ray_dir1, float 5.0, i32 2
|
||||||
|
%ray_inv_dir0 = insertelement <4 x float> undef, float 6.0, i32 0
|
||||||
|
%ray_inv_dir1 = insertelement <4 x float> %ray_inv_dir0, float 7.0, i32 1
|
||||||
|
%ray_inv_dir = insertelement <4 x float> %ray_inv_dir1, float 8.0, i32 2
|
||||||
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64 1111111111111, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> %tdescr)
|
||||||
|
store <4 x i32> %v, <4 x i32>* undef
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}image_bvh64_intersect_ray_a16_nsa_reassign:
|
||||||
|
; GCN: image_bvh64_intersect_ray v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}] a16{{$}}
|
||||||
|
define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(float* %p_ray, <4 x i32> inreg %tdescr) {
|
||||||
|
main_body:
|
||||||
|
%lid = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||||
|
%gep_ray = getelementptr inbounds float, float* %p_ray, i32 %lid
|
||||||
|
%ray_extent = load float, float* %gep_ray, align 4
|
||||||
|
%ray_origin0 = insertelement <4 x float> undef, float 0.0, i32 0
|
||||||
|
%ray_origin1 = insertelement <4 x float> %ray_origin0, float 1.0, i32 1
|
||||||
|
%ray_origin = insertelement <4 x float> %ray_origin1, float 2.0, i32 2
|
||||||
|
%ray_dir0 = insertelement <4 x half> undef, half 3.0, i32 0
|
||||||
|
%ray_dir1 = insertelement <4 x half> %ray_dir0, half 4.0, i32 1
|
||||||
|
%ray_dir = insertelement <4 x half> %ray_dir1, half 5.0, i32 2
|
||||||
|
%ray_inv_dir0 = insertelement <4 x half> undef, half 6.0, i32 0
|
||||||
|
%ray_inv_dir1 = insertelement <4 x half> %ray_inv_dir0, half 7.0, i32 1
|
||||||
|
%ray_inv_dir = insertelement <4 x half> %ray_inv_dir1, half 8.0, i32 2
|
||||||
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64 1111111111110, float %ray_extent, <4 x float> %ray_origin, <4 x half> %ray_dir, <4 x half> %ray_inv_dir, <4 x i32> %tdescr)
|
||||||
|
store <4 x i32> %v, <4 x i32>* undef
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
declare i32 @llvm.amdgcn.workitem.id.x()
|
|
@ -23,16 +23,16 @@ v_fma_legacy_f32 v0, v1, v2, v3
|
||||||
// GFX10: error: instruction not supported on this GPU
|
// GFX10: error: instruction not supported on this GPU
|
||||||
|
|
||||||
image_bvh_intersect_ray v[4:7], v[9:24], s[4:7]
|
image_bvh_intersect_ray v[4:7], v[9:24], s[4:7]
|
||||||
// GFX10: error: invalid instruction
|
// GFX10: error: instruction not supported on this GPU
|
||||||
|
|
||||||
image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16
|
image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16
|
||||||
// GFX10: error: invalid instruction
|
// GFX10: error: invalid operand
|
||||||
|
|
||||||
image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7]
|
image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7]
|
||||||
// GFX10: error: invalid instruction
|
// GFX10: error: instruction not supported on this GPU
|
||||||
|
|
||||||
image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] a16
|
image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] a16
|
||||||
// GFX10: error: invalid instruction
|
// GFX10: error: invalid operand
|
||||||
|
|
||||||
image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D
|
image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D
|
||||||
// GFX10: error: not a valid operand.
|
// GFX10: error: not a valid operand.
|
||||||
|
|
|
@ -61,6 +61,30 @@ v_fma_legacy_f32 v0, v1, |v2|, -v3
|
||||||
v_fma_legacy_f32 v0, s1, 2.0, -v3
|
v_fma_legacy_f32 v0, s1, 2.0, -v3
|
||||||
// GFX10: encoding: [0x00,0x00,0x40,0xd5,0x01,0xe8,0x0d,0x84]
|
// GFX10: encoding: [0x00,0x00,0x40,0xd5,0x01,0xe8,0x0d,0x84]
|
||||||
|
|
||||||
|
image_bvh_intersect_ray v[4:7], v[9:24], s[4:7]
|
||||||
|
// GFX10: encoding: [0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x00]
|
||||||
|
|
||||||
|
image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16
|
||||||
|
// GFX10: encoding: [0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x40]
|
||||||
|
|
||||||
|
image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7]
|
||||||
|
// GFX10: encoding: [0x01,0x9f,0x9c,0xf1,0x09,0x04,0x01,0x00]
|
||||||
|
|
||||||
|
image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] a16
|
||||||
|
// GFX10: encoding: [0x01,0x9f,0x9c,0xf1,0x09,0x04,0x01,0x40]
|
||||||
|
|
||||||
|
image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19, v37, v40], s[12:15]
|
||||||
|
// GFX10: encoding: [0x07,0x9f,0x98,0xf1,0x32,0x27,0x03,0x00,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x13,0x25,0x28,0x00,0x00]
|
||||||
|
|
||||||
|
image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20], s[12:15] a16
|
||||||
|
// GFX10: encoding: [0x05,0x9f,0x98,0xf1,0x32,0x27,0x03,0x40,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x00]
|
||||||
|
|
||||||
|
image_bvh64_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19, v37, v40, v42], s[12:15]
|
||||||
|
// GFX10: encoding: [0x07,0x9f,0x9c,0xf1,0x32,0x27,0x03,0x00,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x13,0x25,0x28,0x2a,0x00]
|
||||||
|
|
||||||
|
image_bvh64_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19], s[12:15] a16
|
||||||
|
// GFX10: encoding: [0x05,0x9f,0x9c,0xf1,0x32,0x27,0x03,0x40,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x13]
|
||||||
|
|
||||||
image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D
|
image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D
|
||||||
// GFX10: encoding: [0x01,0x0f,0x00,0xf0,0x05,0x01,0x02,0x00]
|
// GFX10: encoding: [0x01,0x0f,0x00,0xf0,0x05,0x01,0x02,0x00]
|
||||||
|
|
||||||
|
|
|
@ -52,6 +52,30 @@
|
||||||
# GFX10: v_fma_legacy_f32 v0, s1, 2.0, -v3
|
# GFX10: v_fma_legacy_f32 v0, s1, 2.0, -v3
|
||||||
0x00,0x00,0x40,0xd5,0x01,0xe8,0x0d,0x84
|
0x00,0x00,0x40,0xd5,0x01,0xe8,0x0d,0x84
|
||||||
|
|
||||||
|
# GFX10: image_bvh_intersect_ray v[4:7], v[9:24], s[4:7]
|
||||||
|
0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x00
|
||||||
|
|
||||||
|
# GFX10: image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16
|
||||||
|
0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x40
|
||||||
|
|
||||||
|
# GFX10: image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7]
|
||||||
|
0x01,0x9f,0x9c,0xf1,0x09,0x04,0x01,0x00
|
||||||
|
|
||||||
|
# GFX10: image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] a16
|
||||||
|
0x01,0x9f,0x9c,0xf1,0x09,0x04,0x01,0x40
|
||||||
|
|
||||||
|
# GFX10: image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19, v37, v40], s[12:15]
|
||||||
|
0x07,0x9f,0x98,0xf1,0x32,0x27,0x03,0x00,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x13,0x25,0x28,0x00,0x00
|
||||||
|
|
||||||
|
# GFX10: image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20], s[12:15] a16
|
||||||
|
0x05,0x9f,0x98,0xf1,0x32,0x27,0x03,0x40,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x00
|
||||||
|
|
||||||
|
# GFX10: image_bvh64_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19, v37, v40, v42], s[12:15]
|
||||||
|
0x07,0x9f,0x9c,0xf1,0x32,0x27,0x03,0x00,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x13,0x25,0x28,0x2a,0x00
|
||||||
|
|
||||||
|
# GFX10: image_bvh64_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19], s[12:15] a16
|
||||||
|
0x05,0x9f,0x9c,0xf1,0x32,0x27,0x03,0x40,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x13
|
||||||
|
|
||||||
# GFX10: image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D
|
# GFX10: image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D
|
||||||
0x01,0x0f,0x00,0xf0,0x05,0x01,0x02,0x00
|
0x01,0x0f,0x00,0xf0,0x05,0x01,0x02,0x00
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue