forked from OSchip/llvm-project
[AMDGPU] Add support for new LLVM vector types
Add VReg, AReg and SReg on AMDGPU for bit widths: 288, 320, 352 and 384. Differential Revision: https://reviews.llvm.org/D138205
This commit is contained in:
parent
68057c2b8d
commit
595a08847a
|
@ -34,12 +34,24 @@ enum PartialMappingIdx {
|
|||
PM_SGPR96 = 23,
|
||||
PM_VGPR96 = 24,
|
||||
PM_AGPR96 = 25,
|
||||
PM_AGPR32 = 31,
|
||||
PM_AGPR64 = 32,
|
||||
PM_AGPR128 = 33,
|
||||
PM_AGPR256 = 34,
|
||||
PM_AGPR512 = 35,
|
||||
PM_AGPR1024 = 36
|
||||
PM_SGPR288 = 26,
|
||||
PM_VGPR288 = 27,
|
||||
PM_AGPR288 = 28,
|
||||
PM_SGPR320 = 29,
|
||||
PM_VGPR320 = 30,
|
||||
PM_AGPR320 = 31,
|
||||
PM_SGPR352 = 32,
|
||||
PM_VGPR352 = 33,
|
||||
PM_AGPR352 = 34,
|
||||
PM_SGPR384 = 35,
|
||||
PM_VGPR384 = 36,
|
||||
PM_AGPR384 = 37,
|
||||
PM_AGPR32 = 38,
|
||||
PM_AGPR64 = 39,
|
||||
PM_AGPR128 = 40,
|
||||
PM_AGPR256 = 41,
|
||||
PM_AGPR512 = 42,
|
||||
PM_AGPR1024 = 43
|
||||
};
|
||||
|
||||
const RegisterBankInfo::PartialMapping PartMappings[] {
|
||||
|
@ -66,6 +78,18 @@ const RegisterBankInfo::PartialMapping PartMappings[] {
|
|||
{0, 96, SGPRRegBank},
|
||||
{0, 96, VGPRRegBank},
|
||||
{0, 96, AGPRRegBank},
|
||||
{0, 288, SGPRRegBank},
|
||||
{0, 288, VGPRRegBank},
|
||||
{0, 288, AGPRRegBank},
|
||||
{0, 320, SGPRRegBank},
|
||||
{0, 320, VGPRRegBank},
|
||||
{0, 320, AGPRRegBank},
|
||||
{0, 352, SGPRRegBank},
|
||||
{0, 352, VGPRRegBank},
|
||||
{0, 352, AGPRRegBank},
|
||||
{0, 384, SGPRRegBank},
|
||||
{0, 384, VGPRRegBank},
|
||||
{0, 384, AGPRRegBank},
|
||||
|
||||
{0, 32, AGPRRegBank}, // AGPR begin
|
||||
{0, 64, AGPRRegBank},
|
||||
|
@ -107,6 +131,18 @@ const RegisterBankInfo::ValueMapping ValMappings[] {
|
|||
{&PartMappings[17], 1},
|
||||
{&PartMappings[18], 1},
|
||||
{&PartMappings[19], 1},
|
||||
{&PartMappings[20], 1},
|
||||
{&PartMappings[21], 1},
|
||||
{&PartMappings[22], 1},
|
||||
{&PartMappings[23], 1},
|
||||
{&PartMappings[24], 1},
|
||||
{&PartMappings[25], 1},
|
||||
{&PartMappings[26], 1},
|
||||
{&PartMappings[27], 1},
|
||||
{&PartMappings[28], 1},
|
||||
{&PartMappings[29], 1},
|
||||
{&PartMappings[30], 1},
|
||||
{&PartMappings[31], 1},
|
||||
|
||||
// AGPRs
|
||||
{nullptr, 0},
|
||||
|
@ -114,12 +150,12 @@ const RegisterBankInfo::ValueMapping ValMappings[] {
|
|||
{nullptr, 0},
|
||||
{nullptr, 0},
|
||||
{nullptr, 0},
|
||||
{&PartMappings[20], 1}, // 32
|
||||
{&PartMappings[21], 1}, // 64
|
||||
{&PartMappings[22], 1}, // 128
|
||||
{&PartMappings[23], 1}, // 256
|
||||
{&PartMappings[24], 1}, // 512
|
||||
{&PartMappings[25], 1} // 1024
|
||||
{&PartMappings[32], 1}, // 32
|
||||
{&PartMappings[33], 1}, // 64
|
||||
{&PartMappings[34], 1}, // 128
|
||||
{&PartMappings[35], 1}, // 256
|
||||
{&PartMappings[36], 1}, // 512
|
||||
{&PartMappings[37], 1} // 1024
|
||||
};
|
||||
|
||||
const RegisterBankInfo::PartialMapping SGPROnly64BreakDown[] {
|
||||
|
@ -148,7 +184,7 @@ const RegisterBankInfo::ValueMapping ValMappingsSGPR64OnlyVGPR32[] {
|
|||
enum ValueMappingIdx {
|
||||
SGPRStartIdx = 1,
|
||||
VGPRStartIdx = 12,
|
||||
AGPRStartIdx = 26
|
||||
AGPRStartIdx = 38
|
||||
};
|
||||
|
||||
const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID,
|
||||
|
@ -175,6 +211,62 @@ const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID,
|
|||
default: llvm_unreachable("Invalid register bank");
|
||||
}
|
||||
break;
|
||||
case 288:
|
||||
switch (BankID) {
|
||||
case AMDGPU::VGPRRegBankID:
|
||||
Idx = PM_VGPR288;
|
||||
break;
|
||||
case AMDGPU::SGPRRegBankID:
|
||||
Idx = PM_SGPR288;
|
||||
break;
|
||||
case AMDGPU::AGPRRegBankID:
|
||||
Idx = PM_AGPR288;
|
||||
break;
|
||||
default: llvm_unreachable("Invalid register bank");
|
||||
}
|
||||
break;
|
||||
case 320:
|
||||
switch (BankID) {
|
||||
case AMDGPU::VGPRRegBankID:
|
||||
Idx = PM_VGPR320;
|
||||
break;
|
||||
case AMDGPU::SGPRRegBankID:
|
||||
Idx = PM_SGPR320;
|
||||
break;
|
||||
case AMDGPU::AGPRRegBankID:
|
||||
Idx = PM_AGPR320;
|
||||
break;
|
||||
default: llvm_unreachable("Invalid register bank");
|
||||
}
|
||||
break;
|
||||
case 352:
|
||||
switch (BankID) {
|
||||
case AMDGPU::VGPRRegBankID:
|
||||
Idx = PM_VGPR352;
|
||||
break;
|
||||
case AMDGPU::SGPRRegBankID:
|
||||
Idx = PM_SGPR352;
|
||||
break;
|
||||
case AMDGPU::AGPRRegBankID:
|
||||
Idx = PM_AGPR352;
|
||||
break;
|
||||
default: llvm_unreachable("Invalid register bank");
|
||||
}
|
||||
break;
|
||||
case 384:
|
||||
switch (BankID) {
|
||||
case AMDGPU::VGPRRegBankID:
|
||||
Idx = PM_VGPR384;
|
||||
break;
|
||||
case AMDGPU::SGPRRegBankID:
|
||||
Idx = PM_SGPR384;
|
||||
break;
|
||||
case AMDGPU::AGPRRegBankID:
|
||||
Idx = PM_AGPR384;
|
||||
break;
|
||||
default: llvm_unreachable("Invalid register bank");
|
||||
}
|
||||
break;
|
||||
default:
|
||||
switch (BankID) {
|
||||
case AMDGPU::VGPRRegBankID:
|
||||
|
|
|
@ -84,6 +84,18 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::LOAD, MVT::v8f32, Promote);
|
||||
AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32);
|
||||
|
||||
setOperationAction(ISD::LOAD, MVT::v9f32, Promote);
|
||||
AddPromotedToType(ISD::LOAD, MVT::v9f32, MVT::v9i32);
|
||||
|
||||
setOperationAction(ISD::LOAD, MVT::v10f32, Promote);
|
||||
AddPromotedToType(ISD::LOAD, MVT::v10f32, MVT::v10i32);
|
||||
|
||||
setOperationAction(ISD::LOAD, MVT::v11f32, Promote);
|
||||
AddPromotedToType(ISD::LOAD, MVT::v11f32, MVT::v11i32);
|
||||
|
||||
setOperationAction(ISD::LOAD, MVT::v12f32, Promote);
|
||||
AddPromotedToType(ISD::LOAD, MVT::v12f32, MVT::v12i32);
|
||||
|
||||
setOperationAction(ISD::LOAD, MVT::v16f32, Promote);
|
||||
AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32);
|
||||
|
||||
|
@ -196,6 +208,18 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::STORE, MVT::v8f32, Promote);
|
||||
AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32);
|
||||
|
||||
setOperationAction(ISD::STORE, MVT::v9f32, Promote);
|
||||
AddPromotedToType(ISD::STORE, MVT::v9f32, MVT::v9i32);
|
||||
|
||||
setOperationAction(ISD::STORE, MVT::v10f32, Promote);
|
||||
AddPromotedToType(ISD::STORE, MVT::v10f32, MVT::v10i32);
|
||||
|
||||
setOperationAction(ISD::STORE, MVT::v11f32, Promote);
|
||||
AddPromotedToType(ISD::STORE, MVT::v11f32, MVT::v11i32);
|
||||
|
||||
setOperationAction(ISD::STORE, MVT::v12f32, Promote);
|
||||
AddPromotedToType(ISD::STORE, MVT::v12f32, MVT::v12i32);
|
||||
|
||||
setOperationAction(ISD::STORE, MVT::v16f32, Promote);
|
||||
AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32);
|
||||
|
||||
|
@ -325,19 +349,23 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::FSUB, MVT::f64, Expand);
|
||||
|
||||
setOperationAction(ISD::CONCAT_VECTORS,
|
||||
{MVT::v3i32, MVT::v3f32, MVT::v4i32, MVT::v4f32,
|
||||
MVT::v5i32, MVT::v5f32, MVT::v6i32, MVT::v6f32,
|
||||
MVT::v7i32, MVT::v7f32, MVT::v8i32, MVT::v8f32},
|
||||
{MVT::v3i32, MVT::v3f32, MVT::v4i32, MVT::v4f32,
|
||||
MVT::v5i32, MVT::v5f32, MVT::v6i32, MVT::v6f32,
|
||||
MVT::v7i32, MVT::v7f32, MVT::v8i32, MVT::v8f32,
|
||||
MVT::v9i32, MVT::v9f32, MVT::v10i32, MVT::v10f32,
|
||||
MVT::v11i32, MVT::v11f32, MVT::v12i32, MVT::v12f32},
|
||||
Custom);
|
||||
setOperationAction(
|
||||
ISD::EXTRACT_SUBVECTOR,
|
||||
{MVT::v2f16, MVT::v2i16, MVT::v4f16, MVT::v4i16, MVT::v2f32,
|
||||
MVT::v2i32, MVT::v3f32, MVT::v3i32, MVT::v4f32, MVT::v4i32,
|
||||
MVT::v5f32, MVT::v5i32, MVT::v6f32, MVT::v6i32, MVT::v7f32,
|
||||
MVT::v7i32, MVT::v8f32, MVT::v8i32, MVT::v16f16, MVT::v16i16,
|
||||
MVT::v16f32, MVT::v16i32, MVT::v32f32, MVT::v32i32, MVT::v2f64,
|
||||
MVT::v2i64, MVT::v3f64, MVT::v3i64, MVT::v4f64, MVT::v4i64,
|
||||
MVT::v8f64, MVT::v8i64, MVT::v16f64, MVT::v16i64},
|
||||
MVT::v7i32, MVT::v8f32, MVT::v8i32, MVT::v9f32, MVT::v9i32,
|
||||
MVT::v10i32, MVT::v10f32, MVT::v11i32, MVT::v11f32, MVT::v12i32,
|
||||
MVT::v12f32, MVT::v16f16, MVT::v16i16, MVT::v16f32, MVT::v16i32,
|
||||
MVT::v32f32, MVT::v32i32, MVT::v2f64, MVT::v2i64, MVT::v3f64,
|
||||
MVT::v3i64, MVT::v4f64, MVT::v4i64, MVT::v8f64, MVT::v8i64,
|
||||
MVT::v16f64, MVT::v16i64},
|
||||
Custom);
|
||||
|
||||
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
|
||||
|
@ -384,7 +412,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
|
|||
MVT::i64, Custom);
|
||||
|
||||
static const MVT::SimpleValueType VectorIntTypes[] = {
|
||||
MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32, MVT::v6i32, MVT::v7i32};
|
||||
MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32, MVT::v6i32, MVT::v7i32,
|
||||
MVT::v9i32, MVT::v10i32, MVT::v11i32, MVT::v12i32};
|
||||
|
||||
for (MVT VT : VectorIntTypes) {
|
||||
// Expand the following operations for the current type by default.
|
||||
|
@ -404,7 +433,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
|
|||
}
|
||||
|
||||
static const MVT::SimpleValueType FloatVectorTypes[] = {
|
||||
MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32, MVT::v6f32, MVT::v7f32};
|
||||
MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32, MVT::v6f32, MVT::v7f32,
|
||||
MVT::v9f32, MVT::v10f32, MVT::v11f32, MVT::v12f32};
|
||||
|
||||
for (MVT VT : FloatVectorTypes) {
|
||||
setOperationAction(
|
||||
|
@ -440,6 +470,18 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::SELECT, MVT::v7f32, Promote);
|
||||
AddPromotedToType(ISD::SELECT, MVT::v7f32, MVT::v7i32);
|
||||
|
||||
setOperationAction(ISD::SELECT, MVT::v9f32, Promote);
|
||||
AddPromotedToType(ISD::SELECT, MVT::v9f32, MVT::v9i32);
|
||||
|
||||
setOperationAction(ISD::SELECT, MVT::v10f32, Promote);
|
||||
AddPromotedToType(ISD::SELECT, MVT::v10f32, MVT::v10i32);
|
||||
|
||||
setOperationAction(ISD::SELECT, MVT::v11f32, Promote);
|
||||
AddPromotedToType(ISD::SELECT, MVT::v11f32, MVT::v11i32);
|
||||
|
||||
setOperationAction(ISD::SELECT, MVT::v12f32, Promote);
|
||||
AddPromotedToType(ISD::SELECT, MVT::v12f32, MVT::v12i32);
|
||||
|
||||
// There are no libcalls of any kind.
|
||||
for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I)
|
||||
setLibcallName(static_cast<RTLIB::Libcall>(I), nullptr);
|
||||
|
@ -1064,7 +1106,9 @@ void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(
|
|||
// Round up vec3/vec5 argument.
|
||||
if (MemVT.isVector() && !MemVT.isPow2VectorType()) {
|
||||
assert(MemVT.getVectorNumElements() == 3 ||
|
||||
MemVT.getVectorNumElements() == 5);
|
||||
MemVT.getVectorNumElements() == 5 ||
|
||||
(MemVT.getVectorNumElements() >= 9 &&
|
||||
MemVT.getVectorNumElements() <= 12));
|
||||
MemVT = MemVT.getPow2VectorType(State.getContext());
|
||||
} else if (!MemVT.isSimple() && !MemVT.isVector()) {
|
||||
MemVT = MemVT.getRoundIntegerType(State.getContext());
|
||||
|
|
|
@ -5446,7 +5446,7 @@ bool AMDGPULegalizerInfo::legalizeBVHIntrinsic(MachineInstr &MI,
|
|||
Opcode = AMDGPU::getMIMGOpcode(
|
||||
BaseOpcodes[Is64][IsA16],
|
||||
IsGFX11Plus ? AMDGPU::MIMGEncGfx11Default : AMDGPU::MIMGEncGfx10Default,
|
||||
NumVDataDwords, PowerOf2Ceil(NumVAddrDwords));
|
||||
NumVDataDwords, NumVAddrDwords);
|
||||
}
|
||||
assert(Opcode != -1);
|
||||
|
||||
|
|
|
@ -7,16 +7,16 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def SGPRRegBank : RegisterBank<"SGPR",
|
||||
[SReg_LO16, SReg_32, SReg_64, SReg_96, SReg_128, SReg_160, SReg_192, SReg_224, SReg_256, SReg_512, SReg_1024]
|
||||
[SReg_LO16, SReg_32, SReg_64, SReg_96, SReg_128, SReg_160, SReg_192, SReg_224, SReg_256, SReg_288, SReg_320, SReg_352, SReg_384, SReg_512, SReg_1024]
|
||||
>;
|
||||
|
||||
def VGPRRegBank : RegisterBank<"VGPR",
|
||||
[VGPR_LO16, VGPR_HI16, VGPR_32, VReg_64, VReg_96, VReg_128, VReg_160, VReg_192, VReg_224, VReg_256, VReg_512, VReg_1024]
|
||||
[VGPR_LO16, VGPR_HI16, VGPR_32, VReg_64, VReg_96, VReg_128, VReg_160, VReg_192, VReg_224, VReg_256, VReg_288, VReg_320, VReg_352, VReg_384, VReg_512, VReg_1024]
|
||||
>;
|
||||
|
||||
// It is helpful to distinguish conditions from ordinary SGPRs.
|
||||
def VCCRegBank : RegisterBank <"VCC", [SReg_1]>;
|
||||
|
||||
def AGPRRegBank : RegisterBank <"AGPR",
|
||||
[AGPR_LO16, AGPR_32, AReg_64, AReg_96, AReg_128, AReg_160, AReg_192, AReg_224, AReg_256, AReg_512, AReg_1024]
|
||||
[AGPR_LO16, AGPR_32, AReg_64, AReg_96, AReg_128, AReg_160, AReg_192, AReg_224, AReg_256, AReg_288, AReg_320, AReg_352, AReg_384, AReg_512, AReg_1024]
|
||||
>;
|
||||
|
|
|
@ -431,6 +431,46 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
|
|||
IsSGPR = false;
|
||||
IsAGPR = true;
|
||||
Width = 8;
|
||||
} else if (AMDGPU::VReg_288RegClass.contains(Reg)) {
|
||||
IsSGPR = false;
|
||||
Width = 9;
|
||||
} else if (AMDGPU::SReg_288RegClass.contains(Reg)) {
|
||||
IsSGPR = true;
|
||||
Width = 9;
|
||||
} else if (AMDGPU::AReg_288RegClass.contains(Reg)) {
|
||||
IsSGPR = false;
|
||||
IsAGPR = true;
|
||||
Width = 9;
|
||||
} else if (AMDGPU::VReg_320RegClass.contains(Reg)) {
|
||||
IsSGPR = false;
|
||||
Width = 10;
|
||||
} else if (AMDGPU::SReg_320RegClass.contains(Reg)) {
|
||||
IsSGPR = true;
|
||||
Width = 10;
|
||||
} else if (AMDGPU::AReg_320RegClass.contains(Reg)) {
|
||||
IsSGPR = false;
|
||||
IsAGPR = true;
|
||||
Width = 10;
|
||||
} else if (AMDGPU::VReg_352RegClass.contains(Reg)) {
|
||||
IsSGPR = false;
|
||||
Width = 11;
|
||||
} else if (AMDGPU::SReg_352RegClass.contains(Reg)) {
|
||||
IsSGPR = true;
|
||||
Width = 11;
|
||||
} else if (AMDGPU::AReg_352RegClass.contains(Reg)) {
|
||||
IsSGPR = false;
|
||||
IsAGPR = true;
|
||||
Width = 11;
|
||||
} else if (AMDGPU::VReg_384RegClass.contains(Reg)) {
|
||||
IsSGPR = false;
|
||||
Width = 12;
|
||||
} else if (AMDGPU::SReg_384RegClass.contains(Reg)) {
|
||||
IsSGPR = true;
|
||||
Width = 12;
|
||||
} else if (AMDGPU::AReg_384RegClass.contains(Reg)) {
|
||||
IsSGPR = false;
|
||||
IsAGPR = true;
|
||||
Width = 12;
|
||||
} else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
|
||||
assert(!AMDGPU::TTMP_512RegClass.contains(Reg) &&
|
||||
"trap handler registers should not be used");
|
||||
|
|
|
@ -2360,6 +2360,14 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) {
|
|||
return AMDGPU::VReg_224RegClassID;
|
||||
case 256:
|
||||
return AMDGPU::VReg_256RegClassID;
|
||||
case 288:
|
||||
return AMDGPU::VReg_288RegClassID;
|
||||
case 320:
|
||||
return AMDGPU::VReg_320RegClassID;
|
||||
case 352:
|
||||
return AMDGPU::VReg_352RegClassID;
|
||||
case 384:
|
||||
return AMDGPU::VReg_384RegClassID;
|
||||
case 512:
|
||||
return AMDGPU::VReg_512RegClassID;
|
||||
case 1024:
|
||||
|
@ -2398,6 +2406,14 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) {
|
|||
return AMDGPU::SGPR_224RegClassID;
|
||||
case 256:
|
||||
return AMDGPU::SGPR_256RegClassID;
|
||||
case 288:
|
||||
return AMDGPU::SGPR_288RegClassID;
|
||||
case 320:
|
||||
return AMDGPU::SGPR_320RegClassID;
|
||||
case 352:
|
||||
return AMDGPU::SGPR_352RegClassID;
|
||||
case 384:
|
||||
return AMDGPU::SGPR_384RegClassID;
|
||||
case 512:
|
||||
return AMDGPU::SGPR_512RegClassID;
|
||||
}
|
||||
|
@ -2420,6 +2436,14 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) {
|
|||
return AMDGPU::AReg_224RegClassID;
|
||||
case 256:
|
||||
return AMDGPU::AReg_256RegClassID;
|
||||
case 288:
|
||||
return AMDGPU::AReg_288RegClassID;
|
||||
case 320:
|
||||
return AMDGPU::AReg_320RegClassID;
|
||||
case 352:
|
||||
return AMDGPU::AReg_352RegClassID;
|
||||
case 384:
|
||||
return AMDGPU::AReg_384RegClassID;
|
||||
case 512:
|
||||
return AMDGPU::AReg_512RegClassID;
|
||||
case 1024:
|
||||
|
@ -3684,7 +3708,7 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
|
|||
AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
|
||||
|
||||
if (!IsNSA) {
|
||||
if (ExpectedAddrSize > 8)
|
||||
if (ExpectedAddrSize > 12)
|
||||
ExpectedAddrSize = 16;
|
||||
|
||||
// Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
|
||||
|
|
|
@ -129,6 +129,9 @@ DECODE_OPERAND_REG(VReg_64)
|
|||
DECODE_OPERAND_REG(VReg_96)
|
||||
DECODE_OPERAND_REG(VReg_128)
|
||||
DECODE_OPERAND_REG(VReg_256)
|
||||
DECODE_OPERAND_REG(VReg_288)
|
||||
DECODE_OPERAND_REG(VReg_352)
|
||||
DECODE_OPERAND_REG(VReg_384)
|
||||
DECODE_OPERAND_REG(VReg_512)
|
||||
DECODE_OPERAND_REG(VReg_1024)
|
||||
|
||||
|
@ -919,7 +922,7 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
|
|||
IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
|
||||
Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA;
|
||||
if (!IsNSA) {
|
||||
if (AddrSize > 8)
|
||||
if (AddrSize > 12)
|
||||
AddrSize = 16;
|
||||
} else {
|
||||
if (AddrSize > Info->VAddrDwords) {
|
||||
|
@ -1129,6 +1132,14 @@ MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
|
|||
case AMDGPU::TTMP_256RegClassID:
|
||||
// ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
|
||||
// this bundle?
|
||||
case AMDGPU::SGPR_288RegClassID:
|
||||
case AMDGPU::TTMP_288RegClassID:
|
||||
case AMDGPU::SGPR_320RegClassID:
|
||||
case AMDGPU::TTMP_320RegClassID:
|
||||
case AMDGPU::SGPR_352RegClassID:
|
||||
case AMDGPU::TTMP_352RegClassID:
|
||||
case AMDGPU::SGPR_384RegClassID:
|
||||
case AMDGPU::TTMP_384RegClassID:
|
||||
case AMDGPU::SGPR_512RegClassID:
|
||||
case AMDGPU::TTMP_512RegClassID:
|
||||
shift = 2;
|
||||
|
@ -1204,6 +1215,23 @@ MCOperand AMDGPUDisassembler::decodeOperand_AReg_256(unsigned Val) const {
|
|||
return createRegOperand(AMDGPU::AReg_256RegClassID, Val & 255);
|
||||
}
|
||||
|
||||
MCOperand AMDGPUDisassembler::decodeOperand_AReg_288(unsigned Val) const {
|
||||
return createRegOperand(AMDGPU::AReg_288RegClassID, Val & 255);
|
||||
}
|
||||
|
||||
MCOperand AMDGPUDisassembler::decodeOperand_AReg_320(unsigned Val) const {
|
||||
return createRegOperand(AMDGPU::AReg_320RegClassID, Val & 255);
|
||||
}
|
||||
|
||||
MCOperand AMDGPUDisassembler::decodeOperand_AReg_352(unsigned Val) const {
|
||||
return createRegOperand(AMDGPU::AReg_352RegClassID, Val & 255);
|
||||
}
|
||||
|
||||
MCOperand AMDGPUDisassembler::decodeOperand_AReg_384(unsigned Val) const {
|
||||
return createRegOperand(AMDGPU::AReg_384RegClassID, Val & 255);
|
||||
}
|
||||
|
||||
|
||||
MCOperand AMDGPUDisassembler::decodeOperand_AReg_512(unsigned Val) const {
|
||||
return createRegOperand(AMDGPU::AReg_512RegClassID, Val & 255);
|
||||
}
|
||||
|
@ -1252,6 +1280,22 @@ MCOperand AMDGPUDisassembler::decodeOperand_VReg_256(unsigned Val) const {
|
|||
return createRegOperand(AMDGPU::VReg_256RegClassID, Val);
|
||||
}
|
||||
|
||||
MCOperand AMDGPUDisassembler::decodeOperand_VReg_288(unsigned Val) const {
|
||||
return createRegOperand(AMDGPU::VReg_288RegClassID, Val);
|
||||
}
|
||||
|
||||
MCOperand AMDGPUDisassembler::decodeOperand_VReg_320(unsigned Val) const {
|
||||
return createRegOperand(AMDGPU::VReg_320RegClassID, Val);
|
||||
}
|
||||
|
||||
MCOperand AMDGPUDisassembler::decodeOperand_VReg_352(unsigned Val) const {
|
||||
return createRegOperand(AMDGPU::VReg_352RegClassID, Val);
|
||||
}
|
||||
|
||||
MCOperand AMDGPUDisassembler::decodeOperand_VReg_384(unsigned Val) const {
|
||||
return createRegOperand(AMDGPU::VReg_384RegClassID, Val);
|
||||
}
|
||||
|
||||
MCOperand AMDGPUDisassembler::decodeOperand_VReg_512(unsigned Val) const {
|
||||
return createRegOperand(AMDGPU::VReg_512RegClassID, Val);
|
||||
}
|
||||
|
@ -1302,6 +1346,22 @@ MCOperand AMDGPUDisassembler::decodeOperand_SReg_256(unsigned Val) const {
|
|||
return decodeDstOp(OPW256, Val);
|
||||
}
|
||||
|
||||
MCOperand AMDGPUDisassembler::decodeOperand_SReg_288(unsigned Val) const {
|
||||
return decodeDstOp(OPW288, Val);
|
||||
}
|
||||
|
||||
MCOperand AMDGPUDisassembler::decodeOperand_SReg_320(unsigned Val) const {
|
||||
return decodeDstOp(OPW320, Val);
|
||||
}
|
||||
|
||||
MCOperand AMDGPUDisassembler::decodeOperand_SReg_352(unsigned Val) const {
|
||||
return decodeDstOp(OPW352, Val);
|
||||
}
|
||||
|
||||
MCOperand AMDGPUDisassembler::decodeOperand_SReg_384(unsigned Val) const {
|
||||
return decodeDstOp(OPW384, Val);
|
||||
}
|
||||
|
||||
MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const {
|
||||
return decodeDstOp(OPW512, Val);
|
||||
}
|
||||
|
@ -1460,6 +1520,10 @@ unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
|
|||
case OPW128: return VReg_128RegClassID;
|
||||
case OPW160: return VReg_160RegClassID;
|
||||
case OPW256: return VReg_256RegClassID;
|
||||
case OPW288: return VReg_288RegClassID;
|
||||
case OPW320: return VReg_320RegClassID;
|
||||
case OPW352: return VReg_352RegClassID;
|
||||
case OPW384: return VReg_384RegClassID;
|
||||
case OPW512: return VReg_512RegClassID;
|
||||
case OPW1024: return VReg_1024RegClassID;
|
||||
}
|
||||
|
@ -1481,6 +1545,10 @@ unsigned AMDGPUDisassembler::getAgprClassId(const OpWidthTy Width) const {
|
|||
case OPW128: return AReg_128RegClassID;
|
||||
case OPW160: return AReg_160RegClassID;
|
||||
case OPW256: return AReg_256RegClassID;
|
||||
case OPW288: return AReg_288RegClassID;
|
||||
case OPW320: return AReg_320RegClassID;
|
||||
case OPW352: return AReg_352RegClassID;
|
||||
case OPW384: return AReg_384RegClassID;
|
||||
case OPW512: return AReg_512RegClassID;
|
||||
case OPW1024: return AReg_1024RegClassID;
|
||||
}
|
||||
|
@ -1503,6 +1571,10 @@ unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const {
|
|||
case OPW128: return SGPR_128RegClassID;
|
||||
case OPW160: return SGPR_160RegClassID;
|
||||
case OPW256: return SGPR_256RegClassID;
|
||||
case OPW288: return SGPR_288RegClassID;
|
||||
case OPW320: return SGPR_320RegClassID;
|
||||
case OPW352: return SGPR_352RegClassID;
|
||||
case OPW384: return SGPR_384RegClassID;
|
||||
case OPW512: return SGPR_512RegClassID;
|
||||
}
|
||||
}
|
||||
|
@ -1521,6 +1593,10 @@ unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {
|
|||
case OPWV232: return TTMP_64RegClassID;
|
||||
case OPW128: return TTMP_128RegClassID;
|
||||
case OPW256: return TTMP_256RegClassID;
|
||||
case OPW288: return TTMP_288RegClassID;
|
||||
case OPW320: return TTMP_320RegClassID;
|
||||
case OPW352: return TTMP_352RegClassID;
|
||||
case OPW384: return TTMP_384RegClassID;
|
||||
case OPW512: return TTMP_512RegClassID;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -182,6 +182,10 @@ public:
|
|||
MCOperand decodeOperand_VReg_96(unsigned Val) const;
|
||||
MCOperand decodeOperand_VReg_128(unsigned Val) const;
|
||||
MCOperand decodeOperand_VReg_256(unsigned Val) const;
|
||||
MCOperand decodeOperand_VReg_288(unsigned Val) const;
|
||||
MCOperand decodeOperand_VReg_320(unsigned Val) const;
|
||||
MCOperand decodeOperand_VReg_352(unsigned Val) const;
|
||||
MCOperand decodeOperand_VReg_384(unsigned Val) const;
|
||||
MCOperand decodeOperand_VReg_512(unsigned Val) const;
|
||||
MCOperand decodeOperand_VReg_1024(unsigned Val) const;
|
||||
|
||||
|
@ -193,12 +197,20 @@ public:
|
|||
MCOperand decodeOperand_SReg_64_XEXEC(unsigned Val) const;
|
||||
MCOperand decodeOperand_SReg_128(unsigned Val) const;
|
||||
MCOperand decodeOperand_SReg_256(unsigned Val) const;
|
||||
MCOperand decodeOperand_SReg_288(unsigned Val) const;
|
||||
MCOperand decodeOperand_SReg_320(unsigned Val) const;
|
||||
MCOperand decodeOperand_SReg_352(unsigned Val) const;
|
||||
MCOperand decodeOperand_SReg_384(unsigned Val) const;
|
||||
MCOperand decodeOperand_SReg_512(unsigned Val) const;
|
||||
|
||||
MCOperand decodeOperand_AGPR_32(unsigned Val) const;
|
||||
MCOperand decodeOperand_AReg_64(unsigned Val) const;
|
||||
MCOperand decodeOperand_AReg_128(unsigned Val) const;
|
||||
MCOperand decodeOperand_AReg_256(unsigned Val) const;
|
||||
MCOperand decodeOperand_AReg_288(unsigned Val) const;
|
||||
MCOperand decodeOperand_AReg_320(unsigned Val) const;
|
||||
MCOperand decodeOperand_AReg_352(unsigned Val) const;
|
||||
MCOperand decodeOperand_AReg_384(unsigned Val) const;
|
||||
MCOperand decodeOperand_AReg_512(unsigned Val) const;
|
||||
MCOperand decodeOperand_AReg_1024(unsigned Val) const;
|
||||
MCOperand decodeOperand_AV_32(unsigned Val) const;
|
||||
|
@ -214,6 +226,10 @@ public:
|
|||
OPW128,
|
||||
OPW160,
|
||||
OPW256,
|
||||
OPW288,
|
||||
OPW320,
|
||||
OPW352,
|
||||
OPW384,
|
||||
OPW512,
|
||||
OPW1024,
|
||||
OPW16,
|
||||
|
|
|
@ -503,6 +503,10 @@ void SIMCCodeEmitter::getAVOperandEncoding(const MCInst &MI, unsigned OpNo,
|
|||
MRI.getRegClass(AMDGPU::AReg_192RegClassID).contains(Reg) ||
|
||||
MRI.getRegClass(AMDGPU::AReg_224RegClassID).contains(Reg) ||
|
||||
MRI.getRegClass(AMDGPU::AReg_256RegClassID).contains(Reg) ||
|
||||
MRI.getRegClass(AMDGPU::AReg_288RegClassID).contains(Reg) ||
|
||||
MRI.getRegClass(AMDGPU::AReg_320RegClassID).contains(Reg) ||
|
||||
MRI.getRegClass(AMDGPU::AReg_352RegClassID).contains(Reg) ||
|
||||
MRI.getRegClass(AMDGPU::AReg_384RegClassID).contains(Reg) ||
|
||||
MRI.getRegClass(AMDGPU::AReg_512RegClassID).contains(Reg) ||
|
||||
MRI.getRegClass(AMDGPU::AGPR_LO16RegClassID).contains(Reg))
|
||||
Enc |= 512;
|
||||
|
|
|
@ -958,7 +958,11 @@ class MIMGAddrSize<int dw, bit enable_disasm> {
|
|||
!if(!eq(NumWords, 6), VReg_192,
|
||||
!if(!eq(NumWords, 7), VReg_224,
|
||||
!if(!le(NumWords, 8), VReg_256,
|
||||
!if(!le(NumWords, 16), VReg_512, ?))))))))));
|
||||
!if(!le(NumWords, 9), VReg_288,
|
||||
!if(!le(NumWords, 10), VReg_320,
|
||||
!if(!le(NumWords, 11), VReg_352,
|
||||
!if(!le(NumWords, 12), VReg_384,
|
||||
!if(!le(NumWords, 16), VReg_512, ?))))))))))))));
|
||||
|
||||
// Whether the instruction variant with this vaddr size should be enabled for
|
||||
// the auto-generated disassembler.
|
||||
|
@ -1007,8 +1011,8 @@ class MIMG_Sampler_AddrSizes<AMDGPUSampleVariant sample, bit isG16> {
|
|||
!foreach(range,
|
||||
// V4 is generated for V3 and V4
|
||||
// V8 is generated for V5 through V8
|
||||
// V16 is generated for V9 through V16
|
||||
[[1],[2],[3],[3,4],[5],[6],[7],[5,8],[9,16]],
|
||||
// V16 is generated for V13 through V16
|
||||
[[1],[2],[3],[3,4],[5],[6],[7],[5,8],[9],[10],[11],[12],[13,16]],
|
||||
MIMGAddrSizes_dw_range<range>),
|
||||
lhs, dw,
|
||||
!if(isRangeInList<dw.Min, dw.Max, AllNumAddrWords>.ret,
|
||||
|
|
|
@ -120,6 +120,18 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
|
|||
addRegisterClass(MVT::v4i64, &AMDGPU::SGPR_256RegClass);
|
||||
addRegisterClass(MVT::v4f64, TRI->getVGPRClassForBitWidth(256));
|
||||
|
||||
addRegisterClass(MVT::v9i32, &AMDGPU::SGPR_288RegClass);
|
||||
addRegisterClass(MVT::v9f32, TRI->getVGPRClassForBitWidth(288));
|
||||
|
||||
addRegisterClass(MVT::v10i32, &AMDGPU::SGPR_320RegClass);
|
||||
addRegisterClass(MVT::v10f32, TRI->getVGPRClassForBitWidth(320));
|
||||
|
||||
addRegisterClass(MVT::v11i32, &AMDGPU::SGPR_352RegClass);
|
||||
addRegisterClass(MVT::v11f32, TRI->getVGPRClassForBitWidth(352));
|
||||
|
||||
addRegisterClass(MVT::v12i32, &AMDGPU::SGPR_384RegClass);
|
||||
addRegisterClass(MVT::v12f32, TRI->getVGPRClassForBitWidth(384));
|
||||
|
||||
addRegisterClass(MVT::v16i32, &AMDGPU::SGPR_512RegClass);
|
||||
addRegisterClass(MVT::v16f32, TRI->getVGPRClassForBitWidth(512));
|
||||
|
||||
|
@ -158,15 +170,17 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
|
|||
|
||||
// We need to custom lower vector stores from local memory
|
||||
setOperationAction(ISD::LOAD,
|
||||
{MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32,
|
||||
MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v16i32, MVT::i1,
|
||||
MVT::v32i32},
|
||||
{MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32,
|
||||
MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v9i32,
|
||||
MVT::v10i32, MVT::v11i32, MVT::v12i32, MVT::v16i32,
|
||||
MVT::i1, MVT::v32i32},
|
||||
Custom);
|
||||
|
||||
setOperationAction(ISD::STORE,
|
||||
{MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32,
|
||||
MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v16i32, MVT::i1,
|
||||
MVT::v32i32},
|
||||
{MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32,
|
||||
MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v9i32,
|
||||
MVT::v10i32, MVT::v11i32, MVT::v12i32, MVT::v16i32,
|
||||
MVT::i1, MVT::v32i32},
|
||||
Custom);
|
||||
|
||||
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
|
||||
|
@ -209,12 +223,14 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
|
|||
AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32);
|
||||
|
||||
setOperationAction(ISD::TRUNCATE,
|
||||
{MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32,
|
||||
MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v16i32},
|
||||
{MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32,
|
||||
MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v9i32,
|
||||
MVT::v10i32, MVT::v11i32, MVT::v12i32, MVT::v16i32},
|
||||
Expand);
|
||||
setOperationAction(ISD::FP_ROUND,
|
||||
{MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32,
|
||||
MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v16f32},
|
||||
{MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32,
|
||||
MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v9f32,
|
||||
MVT::v10f32, MVT::v11f32, MVT::v12f32, MVT::v16f32},
|
||||
Expand);
|
||||
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG,
|
||||
|
@ -240,11 +256,13 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
|
|||
// We only support LOAD/STORE and vector manipulation ops for vectors
|
||||
// with > 4 elements.
|
||||
for (MVT VT :
|
||||
{MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32, MVT::v2i64,
|
||||
MVT::v2f64, MVT::v4i16, MVT::v4f16, MVT::v3i64, MVT::v3f64,
|
||||
MVT::v6i32, MVT::v6f32, MVT::v4i64, MVT::v4f64, MVT::v8i64,
|
||||
MVT::v8f64, MVT::v8i16, MVT::v8f16, MVT::v16i16, MVT::v16f16,
|
||||
MVT::v16i64, MVT::v16f64, MVT::v32i32, MVT::v32f32}) {
|
||||
{MVT::v8i32, MVT::v8f32, MVT::v9i32, MVT::v9f32, MVT::v10i32,
|
||||
MVT::v10f32, MVT::v11i32, MVT::v11f32, MVT::v12i32, MVT::v12f32,
|
||||
MVT::v16i32, MVT::v16f32, MVT::v2i64, MVT::v2f64, MVT::v4i16,
|
||||
MVT::v4f16, MVT::v3i64, MVT::v3f64, MVT::v6i32, MVT::v6f32,
|
||||
MVT::v4i64, MVT::v4f64, MVT::v8i64, MVT::v8f64, MVT::v8i16,
|
||||
MVT::v8f16, MVT::v16i16, MVT::v16f16, MVT::v16i64, MVT::v16f64,
|
||||
MVT::v32i32, MVT::v32f32}) {
|
||||
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
|
||||
switch (Op) {
|
||||
case ISD::LOAD:
|
||||
|
@ -365,8 +383,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
|
|||
|
||||
// Deal with vec5/6/7 vector operations when widened to vec8.
|
||||
setOperationAction(ISD::INSERT_SUBVECTOR,
|
||||
{MVT::v5i32, MVT::v5f32, MVT::v6i32, MVT::v6f32,
|
||||
MVT::v7i32, MVT::v7f32, MVT::v8i32, MVT::v8f32},
|
||||
{MVT::v5i32, MVT::v5f32, MVT::v6i32, MVT::v6f32,
|
||||
MVT::v7i32, MVT::v7f32, MVT::v8i32, MVT::v8f32,
|
||||
MVT::v9i32, MVT::v9f32, MVT::v10i32, MVT::v10f32,
|
||||
MVT::v11i32, MVT::v11f32, MVT::v12i32, MVT::v12f32},
|
||||
Custom);
|
||||
|
||||
// BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling,
|
||||
|
@ -4235,6 +4255,10 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
|
|||
case AMDGPU::SI_INDIRECT_SRC_V2:
|
||||
case AMDGPU::SI_INDIRECT_SRC_V4:
|
||||
case AMDGPU::SI_INDIRECT_SRC_V8:
|
||||
case AMDGPU::SI_INDIRECT_SRC_V9:
|
||||
case AMDGPU::SI_INDIRECT_SRC_V10:
|
||||
case AMDGPU::SI_INDIRECT_SRC_V11:
|
||||
case AMDGPU::SI_INDIRECT_SRC_V12:
|
||||
case AMDGPU::SI_INDIRECT_SRC_V16:
|
||||
case AMDGPU::SI_INDIRECT_SRC_V32:
|
||||
return emitIndirectSrc(MI, *BB, *getSubtarget());
|
||||
|
@ -4242,6 +4266,10 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
|
|||
case AMDGPU::SI_INDIRECT_DST_V2:
|
||||
case AMDGPU::SI_INDIRECT_DST_V4:
|
||||
case AMDGPU::SI_INDIRECT_DST_V8:
|
||||
case AMDGPU::SI_INDIRECT_DST_V9:
|
||||
case AMDGPU::SI_INDIRECT_DST_V10:
|
||||
case AMDGPU::SI_INDIRECT_DST_V11:
|
||||
case AMDGPU::SI_INDIRECT_DST_V12:
|
||||
case AMDGPU::SI_INDIRECT_DST_V16:
|
||||
case AMDGPU::SI_INDIRECT_DST_V32:
|
||||
return emitIndirectDst(MI, *BB, *getSubtarget());
|
||||
|
@ -6185,7 +6213,7 @@ static SDValue getBuildDwordsVector(SelectionDAG &DAG, SDLoc DL,
|
|||
MVT Type;
|
||||
unsigned NumElts = Elts.size();
|
||||
|
||||
if (NumElts <= 8) {
|
||||
if (NumElts <= 12) {
|
||||
Type = MVT::getVectorVT(MVT::f32, NumElts);
|
||||
} else {
|
||||
assert(Elts.size() <= 16);
|
||||
|
@ -7735,7 +7763,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
|||
AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
|
||||
IsGFX11Plus ? AMDGPU::MIMGEncGfx11Default
|
||||
: AMDGPU::MIMGEncGfx10Default,
|
||||
NumVDataDwords, PowerOf2Ceil(NumVAddrDwords));
|
||||
NumVDataDwords, NumVAddrDwords);
|
||||
}
|
||||
assert(Opcode != -1);
|
||||
|
||||
|
@ -7801,13 +7829,13 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
|||
|
||||
if (!UseNSA) {
|
||||
// Build a single vector containing all the operands so far prepared.
|
||||
if (NumVAddrDwords > 8) {
|
||||
if (NumVAddrDwords > 12) {
|
||||
SDValue Undef = DAG.getUNDEF(MVT::i32);
|
||||
Ops.append(16 - Ops.size(), Undef);
|
||||
}
|
||||
assert(Ops.size() == 8 || Ops.size() == 16);
|
||||
assert(Ops.size() >= 8 && Ops.size() <= 12);
|
||||
SDValue MergedOps = DAG.getBuildVector(
|
||||
Ops.size() == 16 ? MVT::v16i32 : MVT::v8i32, DL, Ops);
|
||||
MVT::getVectorVT(MVT::i32, Ops.size()), DL, Ops);
|
||||
Ops.clear();
|
||||
Ops.push_back(MergedOps);
|
||||
}
|
||||
|
@ -12466,6 +12494,14 @@ static int getAlignedAGPRClassID(unsigned UnalignedClassID) {
|
|||
return AMDGPU::VReg_224_Align2RegClassID;
|
||||
case AMDGPU::VReg_256RegClassID:
|
||||
return AMDGPU::VReg_256_Align2RegClassID;
|
||||
case AMDGPU::VReg_288RegClassID:
|
||||
return AMDGPU::VReg_288_Align2RegClassID;
|
||||
case AMDGPU::VReg_320RegClassID:
|
||||
return AMDGPU::VReg_320_Align2RegClassID;
|
||||
case AMDGPU::VReg_352RegClassID:
|
||||
return AMDGPU::VReg_352_Align2RegClassID;
|
||||
case AMDGPU::VReg_384RegClassID:
|
||||
return AMDGPU::VReg_384_Align2RegClassID;
|
||||
case AMDGPU::VReg_512RegClassID:
|
||||
return AMDGPU::VReg_512_Align2RegClassID;
|
||||
case AMDGPU::VReg_1024RegClassID:
|
||||
|
|
|
@ -1300,6 +1300,14 @@ SIInstrInfo::getIndirectGPRIDXPseudo(unsigned VecSize,
|
|||
return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
|
||||
if (VecSize <= 256) // 32 bytes
|
||||
return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
|
||||
if (VecSize <= 288) // 36 bytes
|
||||
return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
|
||||
if (VecSize <= 320) // 40 bytes
|
||||
return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
|
||||
if (VecSize <= 352) // 44 bytes
|
||||
return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
|
||||
if (VecSize <= 384) // 48 bytes
|
||||
return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
|
||||
if (VecSize <= 512) // 64 bytes
|
||||
return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
|
||||
if (VecSize <= 1024) // 128 bytes
|
||||
|
@ -1320,6 +1328,14 @@ SIInstrInfo::getIndirectGPRIDXPseudo(unsigned VecSize,
|
|||
return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
|
||||
if (VecSize <= 256) // 32 bytes
|
||||
return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
|
||||
if (VecSize <= 288) // 36 bytes
|
||||
return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
|
||||
if (VecSize <= 320) // 40 bytes
|
||||
return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
|
||||
if (VecSize <= 352) // 44 bytes
|
||||
return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
|
||||
if (VecSize <= 384) // 48 bytes
|
||||
return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
|
||||
if (VecSize <= 512) // 64 bytes
|
||||
return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
|
||||
if (VecSize <= 1024) // 128 bytes
|
||||
|
@ -1341,6 +1357,14 @@ static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize) {
|
|||
return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
|
||||
if (VecSize <= 256) // 32 bytes
|
||||
return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
|
||||
if (VecSize <= 288) // 36 bytes
|
||||
return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
|
||||
if (VecSize <= 320) // 40 bytes
|
||||
return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
|
||||
if (VecSize <= 352) // 44 bytes
|
||||
return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
|
||||
if (VecSize <= 384) // 48 bytes
|
||||
return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
|
||||
if (VecSize <= 512) // 64 bytes
|
||||
return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
|
||||
if (VecSize <= 1024) // 128 bytes
|
||||
|
@ -1421,6 +1445,14 @@ static unsigned getSGPRSpillSaveOpcode(unsigned Size) {
|
|||
return AMDGPU::SI_SPILL_S224_SAVE;
|
||||
case 32:
|
||||
return AMDGPU::SI_SPILL_S256_SAVE;
|
||||
case 36:
|
||||
return AMDGPU::SI_SPILL_S288_SAVE;
|
||||
case 40:
|
||||
return AMDGPU::SI_SPILL_S320_SAVE;
|
||||
case 44:
|
||||
return AMDGPU::SI_SPILL_S352_SAVE;
|
||||
case 48:
|
||||
return AMDGPU::SI_SPILL_S384_SAVE;
|
||||
case 64:
|
||||
return AMDGPU::SI_SPILL_S512_SAVE;
|
||||
case 128:
|
||||
|
@ -1448,6 +1480,14 @@ static unsigned getVGPRSpillSaveOpcode(unsigned Size) {
|
|||
return AMDGPU::SI_SPILL_V224_SAVE;
|
||||
case 32:
|
||||
return AMDGPU::SI_SPILL_V256_SAVE;
|
||||
case 36:
|
||||
return AMDGPU::SI_SPILL_S288_SAVE;
|
||||
case 40:
|
||||
return AMDGPU::SI_SPILL_S320_SAVE;
|
||||
case 44:
|
||||
return AMDGPU::SI_SPILL_S352_SAVE;
|
||||
case 48:
|
||||
return AMDGPU::SI_SPILL_S384_SAVE;
|
||||
case 64:
|
||||
return AMDGPU::SI_SPILL_V512_SAVE;
|
||||
case 128:
|
||||
|
@ -1588,6 +1628,14 @@ static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
|
|||
return AMDGPU::SI_SPILL_S224_RESTORE;
|
||||
case 32:
|
||||
return AMDGPU::SI_SPILL_S256_RESTORE;
|
||||
case 36:
|
||||
return AMDGPU::SI_SPILL_S288_RESTORE;
|
||||
case 40:
|
||||
return AMDGPU::SI_SPILL_S320_RESTORE;
|
||||
case 44:
|
||||
return AMDGPU::SI_SPILL_S352_RESTORE;
|
||||
case 48:
|
||||
return AMDGPU::SI_SPILL_S384_RESTORE;
|
||||
case 64:
|
||||
return AMDGPU::SI_SPILL_S512_RESTORE;
|
||||
case 128:
|
||||
|
@ -1615,6 +1663,14 @@ static unsigned getVGPRSpillRestoreOpcode(unsigned Size) {
|
|||
return AMDGPU::SI_SPILL_V224_RESTORE;
|
||||
case 32:
|
||||
return AMDGPU::SI_SPILL_V256_RESTORE;
|
||||
case 36:
|
||||
return AMDGPU::SI_SPILL_V288_RESTORE;
|
||||
case 40:
|
||||
return AMDGPU::SI_SPILL_V320_RESTORE;
|
||||
case 44:
|
||||
return AMDGPU::SI_SPILL_V352_RESTORE;
|
||||
case 48:
|
||||
return AMDGPU::SI_SPILL_V384_RESTORE;
|
||||
case 64:
|
||||
return AMDGPU::SI_SPILL_V512_RESTORE;
|
||||
case 128:
|
||||
|
@ -1642,6 +1698,14 @@ static unsigned getAGPRSpillRestoreOpcode(unsigned Size) {
|
|||
return AMDGPU::SI_SPILL_A224_RESTORE;
|
||||
case 32:
|
||||
return AMDGPU::SI_SPILL_A256_RESTORE;
|
||||
case 36:
|
||||
return AMDGPU::SI_SPILL_A288_RESTORE;
|
||||
case 40:
|
||||
return AMDGPU::SI_SPILL_A320_RESTORE;
|
||||
case 44:
|
||||
return AMDGPU::SI_SPILL_A352_RESTORE;
|
||||
case 48:
|
||||
return AMDGPU::SI_SPILL_A384_RESTORE;
|
||||
case 64:
|
||||
return AMDGPU::SI_SPILL_A512_RESTORE;
|
||||
case 128:
|
||||
|
@ -1669,6 +1733,14 @@ static unsigned getAVSpillRestoreOpcode(unsigned Size) {
|
|||
return AMDGPU::SI_SPILL_AV224_RESTORE;
|
||||
case 32:
|
||||
return AMDGPU::SI_SPILL_AV256_RESTORE;
|
||||
case 36:
|
||||
return AMDGPU::SI_SPILL_AV288_RESTORE;
|
||||
case 40:
|
||||
return AMDGPU::SI_SPILL_AV320_RESTORE;
|
||||
case 44:
|
||||
return AMDGPU::SI_SPILL_AV352_RESTORE;
|
||||
case 48:
|
||||
return AMDGPU::SI_SPILL_AV384_RESTORE;
|
||||
case 64:
|
||||
return AMDGPU::SI_SPILL_AV512_RESTORE;
|
||||
case 128:
|
||||
|
@ -1974,6 +2046,10 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
|
|||
case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
|
||||
case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
|
||||
case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
|
||||
case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
|
||||
case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
|
||||
case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
|
||||
case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
|
||||
case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
|
||||
case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
|
||||
case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
|
||||
|
@ -2025,6 +2101,10 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
|
|||
case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
|
||||
case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
|
||||
case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
|
||||
case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
|
||||
case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
|
||||
case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
|
||||
case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
|
||||
case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
|
||||
case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
|
||||
assert(ST.useVGPRIndexMode());
|
||||
|
@ -2064,6 +2144,10 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
|
|||
case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
|
||||
case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
|
||||
case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
|
||||
case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
|
||||
case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
|
||||
case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
|
||||
case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
|
||||
case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
|
||||
case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
|
||||
assert(ST.useVGPRIndexMode());
|
||||
|
@ -4531,7 +4615,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
|
|||
} else {
|
||||
const TargetRegisterClass *RC = getOpRegClass(MI, VAddr0Idx);
|
||||
VAddrWords = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC) / 32;
|
||||
if (AddrWords > 8)
|
||||
if (AddrWords > 12)
|
||||
AddrWords = 16;
|
||||
}
|
||||
|
||||
|
|
|
@ -650,6 +650,10 @@ def SI_INDIRECT_SRC_V1 : SI_INDIRECT_SRC<VGPR_32>;
|
|||
def SI_INDIRECT_SRC_V2 : SI_INDIRECT_SRC<VReg_64>;
|
||||
def SI_INDIRECT_SRC_V4 : SI_INDIRECT_SRC<VReg_128>;
|
||||
def SI_INDIRECT_SRC_V8 : SI_INDIRECT_SRC<VReg_256>;
|
||||
def SI_INDIRECT_SRC_V9 : SI_INDIRECT_SRC<VReg_288>;
|
||||
def SI_INDIRECT_SRC_V10 : SI_INDIRECT_SRC<VReg_320>;
|
||||
def SI_INDIRECT_SRC_V11 : SI_INDIRECT_SRC<VReg_352>;
|
||||
def SI_INDIRECT_SRC_V12 : SI_INDIRECT_SRC<VReg_384>;
|
||||
def SI_INDIRECT_SRC_V16 : SI_INDIRECT_SRC<VReg_512>;
|
||||
def SI_INDIRECT_SRC_V32 : SI_INDIRECT_SRC<VReg_1024>;
|
||||
|
||||
|
@ -657,6 +661,10 @@ def SI_INDIRECT_DST_V1 : SI_INDIRECT_DST<VGPR_32>;
|
|||
def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>;
|
||||
def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>;
|
||||
def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>;
|
||||
def SI_INDIRECT_DST_V9 : SI_INDIRECT_DST<VReg_288>;
|
||||
def SI_INDIRECT_DST_V10 : SI_INDIRECT_DST<VReg_320>;
|
||||
def SI_INDIRECT_DST_V11 : SI_INDIRECT_DST<VReg_352>;
|
||||
def SI_INDIRECT_DST_V12 : SI_INDIRECT_DST<VReg_384>;
|
||||
def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
|
||||
def SI_INDIRECT_DST_V32 : SI_INDIRECT_DST<VReg_1024>;
|
||||
|
||||
|
@ -698,6 +706,10 @@ def V_INDIRECT_REG_WRITE_MOVREL_B32_V3 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<
|
|||
def V_INDIRECT_REG_WRITE_MOVREL_B32_V4 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_128>;
|
||||
def V_INDIRECT_REG_WRITE_MOVREL_B32_V5 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_160>;
|
||||
def V_INDIRECT_REG_WRITE_MOVREL_B32_V8 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_256>;
|
||||
def V_INDIRECT_REG_WRITE_MOVREL_B32_V9 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_288>;
|
||||
def V_INDIRECT_REG_WRITE_MOVREL_B32_V10 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_320>;
|
||||
def V_INDIRECT_REG_WRITE_MOVREL_B32_V11 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_352>;
|
||||
def V_INDIRECT_REG_WRITE_MOVREL_B32_V12 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_384>;
|
||||
def V_INDIRECT_REG_WRITE_MOVREL_B32_V16 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_512>;
|
||||
def V_INDIRECT_REG_WRITE_MOVREL_B32_V32 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_1024>;
|
||||
|
||||
|
@ -735,6 +747,10 @@ def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VR
|
|||
def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_128>;
|
||||
def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_160>;
|
||||
def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_256>;
|
||||
def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_288>;
|
||||
def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_320>;
|
||||
def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_352>;
|
||||
def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_384>;
|
||||
def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_512>;
|
||||
def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_1024>;
|
||||
|
||||
|
@ -751,6 +767,10 @@ def V_INDIRECT_REG_READ_GPR_IDX_B32_V3 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg
|
|||
def V_INDIRECT_REG_READ_GPR_IDX_B32_V4 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_128>;
|
||||
def V_INDIRECT_REG_READ_GPR_IDX_B32_V5 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_160>;
|
||||
def V_INDIRECT_REG_READ_GPR_IDX_B32_V8 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_256>;
|
||||
def V_INDIRECT_REG_READ_GPR_IDX_B32_V9 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_288>;
|
||||
def V_INDIRECT_REG_READ_GPR_IDX_B32_V10 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_320>;
|
||||
def V_INDIRECT_REG_READ_GPR_IDX_B32_V11 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_352>;
|
||||
def V_INDIRECT_REG_READ_GPR_IDX_B32_V12 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_384>;
|
||||
def V_INDIRECT_REG_READ_GPR_IDX_B32_V16 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_512>;
|
||||
def V_INDIRECT_REG_READ_GPR_IDX_B32_V32 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_1024>;
|
||||
|
||||
|
@ -784,6 +804,10 @@ defm SI_SPILL_S160 : SI_SPILL_SGPR <SReg_160>;
|
|||
defm SI_SPILL_S192 : SI_SPILL_SGPR <SReg_192>;
|
||||
defm SI_SPILL_S224 : SI_SPILL_SGPR <SReg_224>;
|
||||
defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
|
||||
defm SI_SPILL_S288 : SI_SPILL_SGPR <SReg_288>;
|
||||
defm SI_SPILL_S320 : SI_SPILL_SGPR <SReg_320>;
|
||||
defm SI_SPILL_S352 : SI_SPILL_SGPR <SReg_352>;
|
||||
defm SI_SPILL_S384 : SI_SPILL_SGPR <SReg_384>;
|
||||
defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
|
||||
defm SI_SPILL_S1024 : SI_SPILL_SGPR <SReg_1024>;
|
||||
|
||||
|
@ -828,6 +852,10 @@ defm SI_SPILL_V160 : SI_SPILL_VGPR <VReg_160>;
|
|||
defm SI_SPILL_V192 : SI_SPILL_VGPR <VReg_192>;
|
||||
defm SI_SPILL_V224 : SI_SPILL_VGPR <VReg_224>;
|
||||
defm SI_SPILL_V256 : SI_SPILL_VGPR <VReg_256>;
|
||||
defm SI_SPILL_V288 : SI_SPILL_VGPR <VReg_288>;
|
||||
defm SI_SPILL_V320 : SI_SPILL_VGPR <VReg_320>;
|
||||
defm SI_SPILL_V352 : SI_SPILL_VGPR <VReg_352>;
|
||||
defm SI_SPILL_V384 : SI_SPILL_VGPR <VReg_384>;
|
||||
defm SI_SPILL_V512 : SI_SPILL_VGPR <VReg_512>;
|
||||
defm SI_SPILL_V1024 : SI_SPILL_VGPR <VReg_1024>;
|
||||
|
||||
|
@ -839,6 +867,10 @@ defm SI_SPILL_A160 : SI_SPILL_VGPR <AReg_160, 1>;
|
|||
defm SI_SPILL_A192 : SI_SPILL_VGPR <AReg_192, 1>;
|
||||
defm SI_SPILL_A224 : SI_SPILL_VGPR <AReg_224, 1>;
|
||||
defm SI_SPILL_A256 : SI_SPILL_VGPR <AReg_256, 1>;
|
||||
defm SI_SPILL_A288 : SI_SPILL_VGPR <AReg_288, 1>;
|
||||
defm SI_SPILL_A320 : SI_SPILL_VGPR <AReg_320, 1>;
|
||||
defm SI_SPILL_A352 : SI_SPILL_VGPR <AReg_352, 1>;
|
||||
defm SI_SPILL_A384 : SI_SPILL_VGPR <AReg_384, 1>;
|
||||
defm SI_SPILL_A512 : SI_SPILL_VGPR <AReg_512, 1>;
|
||||
defm SI_SPILL_A1024 : SI_SPILL_VGPR <AReg_1024, 1>;
|
||||
|
||||
|
@ -850,6 +882,10 @@ defm SI_SPILL_AV160 : SI_SPILL_VGPR <AV_160, 1>;
|
|||
defm SI_SPILL_AV192 : SI_SPILL_VGPR <AV_192, 1>;
|
||||
defm SI_SPILL_AV224 : SI_SPILL_VGPR <AV_224, 1>;
|
||||
defm SI_SPILL_AV256 : SI_SPILL_VGPR <AV_256, 1>;
|
||||
defm SI_SPILL_AV288 : SI_SPILL_VGPR <AV_288, 1>;
|
||||
defm SI_SPILL_AV320 : SI_SPILL_VGPR <AV_320, 1>;
|
||||
defm SI_SPILL_AV352 : SI_SPILL_VGPR <AV_352, 1>;
|
||||
defm SI_SPILL_AV384 : SI_SPILL_VGPR <AV_384, 1>;
|
||||
defm SI_SPILL_AV512 : SI_SPILL_VGPR <AV_512, 1>;
|
||||
defm SI_SPILL_AV1024 : SI_SPILL_VGPR <AV_1024, 1>;
|
||||
|
||||
|
@ -1225,6 +1261,70 @@ foreach Index = 0-7 in {
|
|||
>;
|
||||
}
|
||||
|
||||
foreach Index = 0-8 in {
|
||||
def Extract_Element_v9i32_#Index : Extract_Element <
|
||||
i32, v9i32, Index, !cast<SubRegIndex>(sub#Index)
|
||||
>;
|
||||
def Insert_Element_v9i32_#Index : Insert_Element <
|
||||
i32, v9i32, Index, !cast<SubRegIndex>(sub#Index)
|
||||
>;
|
||||
|
||||
def Extract_Element_v9f32_#Index : Extract_Element <
|
||||
f32, v9f32, Index, !cast<SubRegIndex>(sub#Index)
|
||||
>;
|
||||
def Insert_Element_v9f32_#Index : Insert_Element <
|
||||
f32, v9f32, Index, !cast<SubRegIndex>(sub#Index)
|
||||
>;
|
||||
}
|
||||
|
||||
foreach Index = 0-9 in {
|
||||
def Extract_Element_v10i32_#Index : Extract_Element <
|
||||
i32, v10i32, Index, !cast<SubRegIndex>(sub#Index)
|
||||
>;
|
||||
def Insert_Element_v10i32_#Index : Insert_Element <
|
||||
i32, v10i32, Index, !cast<SubRegIndex>(sub#Index)
|
||||
>;
|
||||
|
||||
def Extract_Element_v10f32_#Index : Extract_Element <
|
||||
f32, v10f32, Index, !cast<SubRegIndex>(sub#Index)
|
||||
>;
|
||||
def Insert_Element_v10f32_#Index : Insert_Element <
|
||||
f32, v10f32, Index, !cast<SubRegIndex>(sub#Index)
|
||||
>;
|
||||
}
|
||||
|
||||
foreach Index = 0-10 in {
|
||||
def Extract_Element_v11i32_#Index : Extract_Element <
|
||||
i32, v11i32, Index, !cast<SubRegIndex>(sub#Index)
|
||||
>;
|
||||
def Insert_Element_v11i32_#Index : Insert_Element <
|
||||
i32, v11i32, Index, !cast<SubRegIndex>(sub#Index)
|
||||
>;
|
||||
|
||||
def Extract_Element_v11f32_#Index : Extract_Element <
|
||||
f32, v11f32, Index, !cast<SubRegIndex>(sub#Index)
|
||||
>;
|
||||
def Insert_Element_v11f32_#Index : Insert_Element <
|
||||
f32, v11f32, Index, !cast<SubRegIndex>(sub#Index)
|
||||
>;
|
||||
}
|
||||
|
||||
foreach Index = 0-11 in {
|
||||
def Extract_Element_v12i32_#Index : Extract_Element <
|
||||
i32, v12i32, Index, !cast<SubRegIndex>(sub#Index)
|
||||
>;
|
||||
def Insert_Element_v12i32_#Index : Insert_Element <
|
||||
i32, v12i32, Index, !cast<SubRegIndex>(sub#Index)
|
||||
>;
|
||||
|
||||
def Extract_Element_v12f32_#Index : Extract_Element <
|
||||
f32, v12f32, Index, !cast<SubRegIndex>(sub#Index)
|
||||
>;
|
||||
def Insert_Element_v12f32_#Index : Insert_Element <
|
||||
f32, v12f32, Index, !cast<SubRegIndex>(sub#Index)
|
||||
>;
|
||||
}
|
||||
|
||||
foreach Index = 0-15 in {
|
||||
def Extract_Element_v16i32_#Index : Extract_Element <
|
||||
i32, v16i32, Index, !cast<SubRegIndex>(sub#Index)
|
||||
|
@ -1482,6 +1582,30 @@ def : BitConvert <v4i64, v16i16, VReg_256>;
|
|||
def : BitConvert <v4f64, v16f16, VReg_256>;
|
||||
def : BitConvert <v4f64, v16i16, VReg_256>;
|
||||
|
||||
// 288-bit bitcast
|
||||
def : BitConvert <v9i32, v9f32, SReg_288>;
|
||||
def : BitConvert <v9f32, v9i32, SReg_288>;
|
||||
def : BitConvert <v9i32, v9f32, VReg_288>;
|
||||
def : BitConvert <v9f32, v9i32, VReg_288>;
|
||||
|
||||
// 320-bit bitcast
|
||||
def : BitConvert <v10i32, v10f32, SReg_320>;
|
||||
def : BitConvert <v10f32, v10i32, SReg_320>;
|
||||
def : BitConvert <v10i32, v10f32, VReg_320>;
|
||||
def : BitConvert <v10f32, v10i32, VReg_320>;
|
||||
|
||||
// 320-bit bitcast
|
||||
def : BitConvert <v11i32, v11f32, SReg_352>;
|
||||
def : BitConvert <v11f32, v11i32, SReg_352>;
|
||||
def : BitConvert <v11i32, v11f32, VReg_352>;
|
||||
def : BitConvert <v11f32, v11i32, VReg_352>;
|
||||
|
||||
// 384-bit bitcast
|
||||
def : BitConvert <v12i32, v12f32, SReg_384>;
|
||||
def : BitConvert <v12f32, v12i32, SReg_384>;
|
||||
def : BitConvert <v12i32, v12f32, VReg_384>;
|
||||
def : BitConvert <v12f32, v12i32, VReg_384>;
|
||||
|
||||
// 512-bit bitcast
|
||||
def : BitConvert <v16i32, v16f32, VReg_512>;
|
||||
def : BitConvert <v16f32, v16i32, VReg_512>;
|
||||
|
@ -2022,12 +2146,20 @@ multiclass SI_INDIRECT_Pattern <ValueType vt, ValueType eltvt, string VecSize> {
|
|||
defm : SI_INDIRECT_Pattern <v2f32, f32, "V2">;
|
||||
defm : SI_INDIRECT_Pattern <v4f32, f32, "V4">;
|
||||
defm : SI_INDIRECT_Pattern <v8f32, f32, "V8">;
|
||||
defm : SI_INDIRECT_Pattern <v9f32, f32, "V9">;
|
||||
defm : SI_INDIRECT_Pattern <v10f32, f32, "V10">;
|
||||
defm : SI_INDIRECT_Pattern <v11f32, f32, "V11">;
|
||||
defm : SI_INDIRECT_Pattern <v12f32, f32, "V12">;
|
||||
defm : SI_INDIRECT_Pattern <v16f32, f32, "V16">;
|
||||
defm : SI_INDIRECT_Pattern <v32f32, f32, "V32">;
|
||||
|
||||
defm : SI_INDIRECT_Pattern <v2i32, i32, "V2">;
|
||||
defm : SI_INDIRECT_Pattern <v4i32, i32, "V4">;
|
||||
defm : SI_INDIRECT_Pattern <v8i32, i32, "V8">;
|
||||
defm : SI_INDIRECT_Pattern <v9i32, i32, "V9">;
|
||||
defm : SI_INDIRECT_Pattern <v10i32, i32, "V10">;
|
||||
defm : SI_INDIRECT_Pattern <v11i32, i32, "V11">;
|
||||
defm : SI_INDIRECT_Pattern <v12i32, i32, "V12">;
|
||||
defm : SI_INDIRECT_Pattern <v16i32, i32, "V16">;
|
||||
defm : SI_INDIRECT_Pattern <v32i32, i32, "V32">;
|
||||
|
||||
|
|
|
@ -2449,6 +2449,14 @@ getAnyVGPRClassForBitWidth(unsigned BitWidth) {
|
|||
return &AMDGPU::VReg_224RegClass;
|
||||
if (BitWidth <= 256)
|
||||
return &AMDGPU::VReg_256RegClass;
|
||||
if (BitWidth <= 288)
|
||||
return &AMDGPU::VReg_288RegClass;
|
||||
if (BitWidth <= 320)
|
||||
return &AMDGPU::VReg_320RegClass;
|
||||
if (BitWidth <= 352)
|
||||
return &AMDGPU::VReg_352RegClass;
|
||||
if (BitWidth <= 384)
|
||||
return &AMDGPU::VReg_384RegClass;
|
||||
if (BitWidth <= 512)
|
||||
return &AMDGPU::VReg_512RegClass;
|
||||
if (BitWidth <= 1024)
|
||||
|
@ -2473,6 +2481,14 @@ getAlignedVGPRClassForBitWidth(unsigned BitWidth) {
|
|||
return &AMDGPU::VReg_224_Align2RegClass;
|
||||
if (BitWidth <= 256)
|
||||
return &AMDGPU::VReg_256_Align2RegClass;
|
||||
if (BitWidth <= 288)
|
||||
return &AMDGPU::VReg_288_Align2RegClass;
|
||||
if (BitWidth <= 320)
|
||||
return &AMDGPU::VReg_320_Align2RegClass;
|
||||
if (BitWidth <= 352)
|
||||
return &AMDGPU::VReg_352_Align2RegClass;
|
||||
if (BitWidth <= 384)
|
||||
return &AMDGPU::VReg_384_Align2RegClass;
|
||||
if (BitWidth <= 512)
|
||||
return &AMDGPU::VReg_512_Align2RegClass;
|
||||
if (BitWidth <= 1024)
|
||||
|
@ -2509,6 +2525,14 @@ getAnyAGPRClassForBitWidth(unsigned BitWidth) {
|
|||
return &AMDGPU::AReg_224RegClass;
|
||||
if (BitWidth <= 256)
|
||||
return &AMDGPU::AReg_256RegClass;
|
||||
if (BitWidth <= 288)
|
||||
return &AMDGPU::AReg_288RegClass;
|
||||
if (BitWidth <= 320)
|
||||
return &AMDGPU::AReg_320RegClass;
|
||||
if (BitWidth <= 352)
|
||||
return &AMDGPU::AReg_352RegClass;
|
||||
if (BitWidth <= 384)
|
||||
return &AMDGPU::AReg_384RegClass;
|
||||
if (BitWidth <= 512)
|
||||
return &AMDGPU::AReg_512RegClass;
|
||||
if (BitWidth <= 1024)
|
||||
|
@ -2533,6 +2557,14 @@ getAlignedAGPRClassForBitWidth(unsigned BitWidth) {
|
|||
return &AMDGPU::AReg_224_Align2RegClass;
|
||||
if (BitWidth <= 256)
|
||||
return &AMDGPU::AReg_256_Align2RegClass;
|
||||
if (BitWidth <= 288)
|
||||
return &AMDGPU::AReg_288_Align2RegClass;
|
||||
if (BitWidth <= 320)
|
||||
return &AMDGPU::AReg_320_Align2RegClass;
|
||||
if (BitWidth <= 352)
|
||||
return &AMDGPU::AReg_352_Align2RegClass;
|
||||
if (BitWidth <= 384)
|
||||
return &AMDGPU::AReg_384_Align2RegClass;
|
||||
if (BitWidth <= 512)
|
||||
return &AMDGPU::AReg_512_Align2RegClass;
|
||||
if (BitWidth <= 1024)
|
||||
|
@ -2567,6 +2599,14 @@ getAnyVectorSuperClassForBitWidth(unsigned BitWidth) {
|
|||
return &AMDGPU::AV_224RegClass;
|
||||
if (BitWidth <= 256)
|
||||
return &AMDGPU::AV_256RegClass;
|
||||
if (BitWidth <= 288)
|
||||
return &AMDGPU::AV_288RegClass;
|
||||
if (BitWidth <= 320)
|
||||
return &AMDGPU::AV_320RegClass;
|
||||
if (BitWidth <= 352)
|
||||
return &AMDGPU::AV_352RegClass;
|
||||
if (BitWidth <= 384)
|
||||
return &AMDGPU::AV_384RegClass;
|
||||
if (BitWidth <= 512)
|
||||
return &AMDGPU::AV_512RegClass;
|
||||
if (BitWidth <= 1024)
|
||||
|
@ -2591,6 +2631,14 @@ getAlignedVectorSuperClassForBitWidth(unsigned BitWidth) {
|
|||
return &AMDGPU::AV_224_Align2RegClass;
|
||||
if (BitWidth <= 256)
|
||||
return &AMDGPU::AV_256_Align2RegClass;
|
||||
if (BitWidth <= 288)
|
||||
return &AMDGPU::AV_288_Align2RegClass;
|
||||
if (BitWidth <= 320)
|
||||
return &AMDGPU::AV_320_Align2RegClass;
|
||||
if (BitWidth <= 352)
|
||||
return &AMDGPU::AV_352_Align2RegClass;
|
||||
if (BitWidth <= 384)
|
||||
return &AMDGPU::AV_384_Align2RegClass;
|
||||
if (BitWidth <= 512)
|
||||
return &AMDGPU::AV_512_Align2RegClass;
|
||||
if (BitWidth <= 1024)
|
||||
|
@ -2630,6 +2678,14 @@ SIRegisterInfo::getSGPRClassForBitWidth(unsigned BitWidth) {
|
|||
return &AMDGPU::SGPR_224RegClass;
|
||||
if (BitWidth <= 256)
|
||||
return &AMDGPU::SGPR_256RegClass;
|
||||
if (BitWidth <= 288)
|
||||
return &AMDGPU::SGPR_288RegClass;
|
||||
if (BitWidth <= 320)
|
||||
return &AMDGPU::SGPR_320RegClass;
|
||||
if (BitWidth <= 352)
|
||||
return &AMDGPU::SGPR_352RegClass;
|
||||
if (BitWidth <= 384)
|
||||
return &AMDGPU::SGPR_384RegClass;
|
||||
if (BitWidth <= 512)
|
||||
return &AMDGPU::SGPR_512RegClass;
|
||||
if (BitWidth <= 1024)
|
||||
|
@ -2686,6 +2742,26 @@ SIRegisterInfo::getPhysRegClass(MCRegister Reg) const {
|
|||
&AMDGPU::SReg_256RegClass,
|
||||
&AMDGPU::AReg_256_Align2RegClass,
|
||||
&AMDGPU::AReg_256RegClass,
|
||||
&AMDGPU::VReg_288_Align2RegClass,
|
||||
&AMDGPU::VReg_288RegClass,
|
||||
&AMDGPU::SReg_288RegClass,
|
||||
&AMDGPU::AReg_288_Align2RegClass,
|
||||
&AMDGPU::AReg_288RegClass,
|
||||
&AMDGPU::VReg_320_Align2RegClass,
|
||||
&AMDGPU::VReg_320RegClass,
|
||||
&AMDGPU::SReg_320RegClass,
|
||||
&AMDGPU::AReg_320_Align2RegClass,
|
||||
&AMDGPU::AReg_320RegClass,
|
||||
&AMDGPU::VReg_352_Align2RegClass,
|
||||
&AMDGPU::VReg_352RegClass,
|
||||
&AMDGPU::SReg_352RegClass,
|
||||
&AMDGPU::AReg_352_Align2RegClass,
|
||||
&AMDGPU::AReg_352RegClass,
|
||||
&AMDGPU::VReg_384_Align2RegClass,
|
||||
&AMDGPU::VReg_384RegClass,
|
||||
&AMDGPU::SReg_384RegClass,
|
||||
&AMDGPU::AReg_384_Align2RegClass,
|
||||
&AMDGPU::AReg_384RegClass,
|
||||
&AMDGPU::VReg_512_Align2RegClass,
|
||||
&AMDGPU::VReg_512RegClass,
|
||||
&AMDGPU::SReg_512RegClass,
|
||||
|
|
|
@ -60,6 +60,16 @@ class getSubRegs<int size> {
|
|||
list<SubRegIndex> ret6 = [sub0, sub1, sub2, sub3, sub4, sub5];
|
||||
list<SubRegIndex> ret7 = [sub0, sub1, sub2, sub3, sub4, sub5, sub6];
|
||||
list<SubRegIndex> ret8 = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7];
|
||||
list<SubRegIndex> ret9 = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, sub8];
|
||||
list<SubRegIndex> ret10 = [sub0, sub1, sub2, sub3,
|
||||
sub4, sub5, sub6, sub7,
|
||||
sub8, sub9];
|
||||
list<SubRegIndex> ret11 = [sub0, sub1, sub2, sub3,
|
||||
sub4, sub5, sub6, sub7,
|
||||
sub8, sub9, sub10];
|
||||
list<SubRegIndex> ret12 = [sub0, sub1, sub2, sub3,
|
||||
sub4, sub5, sub6, sub7,
|
||||
sub8, sub9, sub10, sub11];
|
||||
list<SubRegIndex> ret16 = [sub0, sub1, sub2, sub3,
|
||||
sub4, sub5, sub6, sub7,
|
||||
sub8, sub9, sub10, sub11,
|
||||
|
@ -80,8 +90,12 @@ class getSubRegs<int size> {
|
|||
!if(!eq(size, 6), ret6,
|
||||
!if(!eq(size, 7), ret7,
|
||||
!if(!eq(size, 8), ret8,
|
||||
!if(!eq(size, 16), ret16,
|
||||
ret32))))))));
|
||||
!if(!eq(size, 9), ret9,
|
||||
!if(!eq(size, 10), ret10,
|
||||
!if(!eq(size, 11), ret11,
|
||||
!if(!eq(size, 12), ret12,
|
||||
!if(!eq(size, 16), ret16,
|
||||
ret32))))))))))));
|
||||
}
|
||||
|
||||
// Generates list of sequential register tuple names.
|
||||
|
@ -423,6 +437,18 @@ def SGPR_224Regs : SIRegisterTuples<getSubRegs<7>.ret, SGPR_32, 105, 4, 7, "s">;
|
|||
// SGPR 256-bit registers
|
||||
def SGPR_256Regs : SIRegisterTuples<getSubRegs<8>.ret, SGPR_32, 105, 4, 8, "s">;
|
||||
|
||||
// SGPR 288-bit registers. No operations use these, but for symmetry with 288-bit VGPRs.
|
||||
def SGPR_288Regs : SIRegisterTuples<getSubRegs<9>.ret, SGPR_32, 105, 4, 9, "s">;
|
||||
|
||||
// SGPR 320-bit registers. No operations use these, but for symmetry with 320-bit VGPRs.
|
||||
def SGPR_320Regs : SIRegisterTuples<getSubRegs<10>.ret, SGPR_32, 105, 4, 10, "s">;
|
||||
|
||||
// SGPR 352-bit registers. No operations use these, but for symmetry with 352-bit VGPRs.
|
||||
def SGPR_352Regs : SIRegisterTuples<getSubRegs<11>.ret, SGPR_32, 105, 4, 11, "s">;
|
||||
|
||||
// SGPR 384-bit registers. No operations use these, but for symmetry with 384-bit VGPRs.
|
||||
def SGPR_384Regs : SIRegisterTuples<getSubRegs<12>.ret, SGPR_32, 105, 4, 12, "s">;
|
||||
|
||||
// SGPR 512-bit registers
|
||||
def SGPR_512Regs : SIRegisterTuples<getSubRegs<16>.ret, SGPR_32, 105, 4, 16, "s">;
|
||||
|
||||
|
@ -465,6 +491,18 @@ def TTMP_224Regs : SIRegisterTuples<getSubRegs<7>.ret, TTMP_32, 15, 4, 7, "ttmp"
|
|||
// Trap handler TMP 256-bit registers
|
||||
def TTMP_256Regs : SIRegisterTuples<getSubRegs<8>.ret, TTMP_32, 15, 4, 8, "ttmp">;
|
||||
|
||||
// Trap handler TMP 288-bit registers
|
||||
def TTMP_288Regs : SIRegisterTuples<getSubRegs<9>.ret, TTMP_32, 15, 4, 9, "ttmp">;
|
||||
|
||||
// Trap handler TMP 320-bit registers
|
||||
def TTMP_320Regs : SIRegisterTuples<getSubRegs<10>.ret, TTMP_32, 15, 4, 10, "ttmp">;
|
||||
|
||||
// Trap handler TMP 352-bit registers
|
||||
def TTMP_352Regs : SIRegisterTuples<getSubRegs<11>.ret, TTMP_32, 15, 4, 11, "ttmp">;
|
||||
|
||||
// Trap handler TMP 384-bit registers
|
||||
def TTMP_384Regs : SIRegisterTuples<getSubRegs<12>.ret, TTMP_32, 15, 4, 12, "ttmp">;
|
||||
|
||||
// Trap handler TMP 512-bit registers
|
||||
def TTMP_512Regs : SIRegisterTuples<getSubRegs<16>.ret, TTMP_32, 15, 4, 16, "ttmp">;
|
||||
|
||||
|
@ -609,6 +647,18 @@ def VGPR_224 : SIRegisterTuples<getSubRegs<7>.ret, VGPR_32, 255, 1, 7, "v">;
|
|||
// VGPR 256-bit registers
|
||||
def VGPR_256 : SIRegisterTuples<getSubRegs<8>.ret, VGPR_32, 255, 1, 8, "v">;
|
||||
|
||||
// VGPR 288-bit registers
|
||||
def VGPR_288 : SIRegisterTuples<getSubRegs<9>.ret, VGPR_32, 255, 1, 9, "v">;
|
||||
|
||||
// VGPR 320-bit registers
|
||||
def VGPR_320 : SIRegisterTuples<getSubRegs<10>.ret, VGPR_32, 255, 1, 10, "v">;
|
||||
|
||||
// VGPR 352-bit registers
|
||||
def VGPR_352 : SIRegisterTuples<getSubRegs<11>.ret, VGPR_32, 255, 1, 11, "v">;
|
||||
|
||||
// VGPR 384-bit registers
|
||||
def VGPR_384 : SIRegisterTuples<getSubRegs<12>.ret, VGPR_32, 255, 1, 12, "v">;
|
||||
|
||||
// VGPR 512-bit registers
|
||||
def VGPR_512 : SIRegisterTuples<getSubRegs<16>.ret, VGPR_32, 255, 1, 16, "v">;
|
||||
|
||||
|
@ -653,6 +703,18 @@ def AGPR_224 : SIRegisterTuples<getSubRegs<7>.ret, AGPR_32, 255, 1, 7, "a">;
|
|||
// AGPR 256-bit registers
|
||||
def AGPR_256 : SIRegisterTuples<getSubRegs<8>.ret, AGPR_32, 255, 1, 8, "a">;
|
||||
|
||||
// AGPR 288-bit registers
|
||||
def AGPR_288 : SIRegisterTuples<getSubRegs<9>.ret, AGPR_32, 255, 1, 9, "a">;
|
||||
|
||||
// AGPR 320-bit registers
|
||||
def AGPR_320 : SIRegisterTuples<getSubRegs<10>.ret, AGPR_32, 255, 1, 10, "a">;
|
||||
|
||||
// AGPR 352-bit registers
|
||||
def AGPR_352 : SIRegisterTuples<getSubRegs<11>.ret, AGPR_32, 255, 1, 11, "a">;
|
||||
|
||||
// AGPR 384-bit registers
|
||||
def AGPR_384 : SIRegisterTuples<getSubRegs<12>.ret, AGPR_32, 255, 1, 12, "a">;
|
||||
|
||||
// AGPR 512-bit registers
|
||||
def AGPR_512 : SIRegisterTuples<getSubRegs<16>.ret, AGPR_32, 255, 1, 16, "a">;
|
||||
|
||||
|
@ -829,6 +891,10 @@ defm "" : SRegClass<5, [v5i32, v5f32], SGPR_160Regs, TTMP_160Regs>;
|
|||
defm "" : SRegClass<6, [v6i32, v6f32, v3i64, v3f64], SGPR_192Regs, TTMP_192Regs>;
|
||||
defm "" : SRegClass<7, [v7i32, v7f32], SGPR_224Regs, TTMP_224Regs>;
|
||||
defm "" : SRegClass<8, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16], SGPR_256Regs, TTMP_256Regs>;
|
||||
defm "" : SRegClass<9, [v9i32, v9f32], SGPR_288Regs, TTMP_288Regs>;
|
||||
defm "" : SRegClass<10, [v10i32, v10f32], SGPR_320Regs, TTMP_320Regs>;
|
||||
defm "" : SRegClass<11, [v11i32, v11f32], SGPR_352Regs, TTMP_352Regs>;
|
||||
defm "" : SRegClass<12, [v12i32, v12f32], SGPR_384Regs, TTMP_384Regs>;
|
||||
|
||||
let GlobalPriority = true in {
|
||||
defm "" : SRegClass<16, [v16i32, v16f32, v8i64, v8f64], SGPR_512Regs, TTMP_512Regs>;
|
||||
|
@ -873,6 +939,10 @@ defm VReg_160 : VRegClass<5, [v5i32, v5f32], (add VGPR_160)>;
|
|||
defm VReg_192 : VRegClass<6, [v6i32, v6f32, v3i64, v3f64], (add VGPR_192)>;
|
||||
defm VReg_224 : VRegClass<7, [v7i32, v7f32], (add VGPR_224)>;
|
||||
defm VReg_256 : VRegClass<8, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16], (add VGPR_256)>;
|
||||
defm VReg_288 : VRegClass<9, [v9i32, v9f32], (add VGPR_288)>;
|
||||
defm VReg_320 : VRegClass<10, [v10i32, v10f32], (add VGPR_320)>;
|
||||
defm VReg_352 : VRegClass<11, [v11i32, v11f32], (add VGPR_352)>;
|
||||
defm VReg_384 : VRegClass<12, [v12i32, v12f32], (add VGPR_384)>;
|
||||
|
||||
let GlobalPriority = true in {
|
||||
defm VReg_512 : VRegClass<16, [v16i32, v16f32, v8i64, v8f64], (add VGPR_512)>;
|
||||
|
@ -897,6 +967,10 @@ defm AReg_160 : ARegClass<5, [v5i32, v5f32], (add AGPR_160)>;
|
|||
defm AReg_192 : ARegClass<6, [v6i32, v6f32, v3i64, v3f64], (add AGPR_192)>;
|
||||
defm AReg_224 : ARegClass<7, [v7i32, v7f32], (add AGPR_224)>;
|
||||
defm AReg_256 : ARegClass<8, [v8i32, v8f32, v4i64, v4f64], (add AGPR_256)>;
|
||||
defm AReg_288 : ARegClass<9, [v9i32, v9f32], (add AGPR_288)>;
|
||||
defm AReg_320 : ARegClass<10, [v10i32, v10f32], (add AGPR_320)>;
|
||||
defm AReg_352 : ARegClass<11, [v11i32, v11f32], (add AGPR_352)>;
|
||||
defm AReg_384 : ARegClass<12, [v12i32, v12f32], (add AGPR_384)>;
|
||||
|
||||
let GlobalPriority = true in {
|
||||
defm AReg_512 : ARegClass<16, [v16i32, v16f32, v8i64, v8f64], (add AGPR_512)>;
|
||||
|
@ -963,6 +1037,10 @@ defm AV_160 : AVRegClass<5, VReg_160.RegTypes, (add VGPR_160), (add AGPR_160)>;
|
|||
defm AV_192 : AVRegClass<6, VReg_192.RegTypes, (add VGPR_192), (add AGPR_192)>;
|
||||
defm AV_224 : AVRegClass<7, VReg_224.RegTypes, (add VGPR_224), (add AGPR_224)>;
|
||||
defm AV_256 : AVRegClass<8, VReg_256.RegTypes, (add VGPR_256), (add AGPR_256)>;
|
||||
defm AV_288 : AVRegClass<9, VReg_288.RegTypes, (add VGPR_288), (add AGPR_288)>;
|
||||
defm AV_320 : AVRegClass<10, VReg_320.RegTypes, (add VGPR_320), (add AGPR_320)>;
|
||||
defm AV_352 : AVRegClass<11, VReg_352.RegTypes, (add VGPR_352), (add AGPR_352)>;
|
||||
defm AV_384 : AVRegClass<12, VReg_384.RegTypes, (add VGPR_384), (add AGPR_384)>;
|
||||
|
||||
let GlobalPriority = true in {
|
||||
defm AV_512 : AVRegClass<16, VReg_512.RegTypes, (add VGPR_512), (add AGPR_512)>;
|
||||
|
|
|
@ -292,6 +292,14 @@ void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {
|
|||
RC = &AMDGPU::VReg_224RegClass;
|
||||
} else if (Info->VAddrDwords == 8) {
|
||||
RC = &AMDGPU::VReg_256RegClass;
|
||||
} else if (Info->VAddrDwords == 9) {
|
||||
RC = &AMDGPU::VReg_288RegClass;
|
||||
} else if (Info->VAddrDwords == 10) {
|
||||
RC = &AMDGPU::VReg_320RegClass;
|
||||
} else if (Info->VAddrDwords == 11) {
|
||||
RC = &AMDGPU::VReg_352RegClass;
|
||||
} else if (Info->VAddrDwords == 12) {
|
||||
RC = &AMDGPU::VReg_384RegClass;
|
||||
} else {
|
||||
RC = &AMDGPU::VReg_512RegClass;
|
||||
NewAddrDwords = 16;
|
||||
|
|
|
@ -2203,6 +2203,42 @@ unsigned getRegBitWidth(unsigned RCID) {
|
|||
case AMDGPU::AV_256RegClassID:
|
||||
case AMDGPU::AV_256_Align2RegClassID:
|
||||
return 256;
|
||||
case AMDGPU::SGPR_288RegClassID:
|
||||
case AMDGPU::SReg_288RegClassID:
|
||||
case AMDGPU::VReg_288RegClassID:
|
||||
case AMDGPU::AReg_288RegClassID:
|
||||
case AMDGPU::VReg_288_Align2RegClassID:
|
||||
case AMDGPU::AReg_288_Align2RegClassID:
|
||||
case AMDGPU::AV_288RegClassID:
|
||||
case AMDGPU::AV_288_Align2RegClassID:
|
||||
return 288;
|
||||
case AMDGPU::SGPR_320RegClassID:
|
||||
case AMDGPU::SReg_320RegClassID:
|
||||
case AMDGPU::VReg_320RegClassID:
|
||||
case AMDGPU::AReg_320RegClassID:
|
||||
case AMDGPU::VReg_320_Align2RegClassID:
|
||||
case AMDGPU::AReg_320_Align2RegClassID:
|
||||
case AMDGPU::AV_320RegClassID:
|
||||
case AMDGPU::AV_320_Align2RegClassID:
|
||||
return 320;
|
||||
case AMDGPU::SGPR_352RegClassID:
|
||||
case AMDGPU::SReg_352RegClassID:
|
||||
case AMDGPU::VReg_352RegClassID:
|
||||
case AMDGPU::AReg_352RegClassID:
|
||||
case AMDGPU::VReg_352_Align2RegClassID:
|
||||
case AMDGPU::AReg_352_Align2RegClassID:
|
||||
case AMDGPU::AV_352RegClassID:
|
||||
case AMDGPU::AV_352_Align2RegClassID:
|
||||
return 352;
|
||||
case AMDGPU::SGPR_384RegClassID:
|
||||
case AMDGPU::SReg_384RegClassID:
|
||||
case AMDGPU::VReg_384RegClassID:
|
||||
case AMDGPU::AReg_384RegClassID:
|
||||
case AMDGPU::VReg_384_Align2RegClassID:
|
||||
case AMDGPU::AReg_384_Align2RegClassID:
|
||||
case AMDGPU::AV_384RegClassID:
|
||||
case AMDGPU::AV_384_Align2RegClassID:
|
||||
return 384;
|
||||
case AMDGPU::SGPR_512RegClassID:
|
||||
case AMDGPU::SReg_512RegClassID:
|
||||
case AMDGPU::VReg_512RegClassID:
|
||||
|
|
|
@ -15,7 +15,7 @@ define amdgpu_kernel void @add_i32() #0 {
|
|||
; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v6i32 = add <6 x i32> undef, undef
|
||||
; ALL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v7i32 = add <7 x i32> undef, undef
|
||||
; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = add <8 x i32> undef, undef
|
||||
; ALL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9i32 = add <9 x i32> undef, undef
|
||||
; ALL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v9i32 = add <9 x i32> undef, undef
|
||||
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
|
||||
;
|
||||
; ALL-SIZE-LABEL: 'add_i32'
|
||||
|
@ -27,7 +27,7 @@ define amdgpu_kernel void @add_i32() #0 {
|
|||
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v6i32 = add <6 x i32> undef, undef
|
||||
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v7i32 = add <7 x i32> undef, undef
|
||||
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = add <8 x i32> undef, undef
|
||||
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9i32 = add <9 x i32> undef, undef
|
||||
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v9i32 = add <9 x i32> undef, undef
|
||||
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
|
||||
;
|
||||
%i32 = add i32 undef, undef
|
||||
|
|
|
@ -50,7 +50,7 @@ define i32 @add(i32 %arg) {
|
|||
; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
|
||||
; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
|
||||
; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; FAST-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V9I32 = call <9 x i32> @llvm.sadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
|
||||
; FAST-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V9I32 = call <9 x i32> @llvm.sadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
|
||||
; FAST-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
|
||||
; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
|
||||
|
@ -79,7 +79,7 @@ define i32 @add(i32 %arg) {
|
|||
; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V9I32 = call <9 x i32> @llvm.sadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V9I32 = call <9 x i32> @llvm.sadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
|
||||
|
@ -108,7 +108,7 @@ define i32 @add(i32 %arg) {
|
|||
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
|
||||
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
|
||||
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V9I32 = call <9 x i32> @llvm.sadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
|
||||
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V9I32 = call <9 x i32> @llvm.sadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
|
||||
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
|
||||
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
|
||||
|
@ -137,7 +137,7 @@ define i32 @add(i32 %arg) {
|
|||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V9I32 = call <9 x i32> @llvm.sadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V9I32 = call <9 x i32> @llvm.sadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
|
||||
|
@ -230,7 +230,7 @@ define i32 @sub(i32 %arg) {
|
|||
; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
|
||||
; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
|
||||
; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; FAST-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V9I32 = call <9 x i32> @llvm.ssub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
|
||||
; FAST-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V9I32 = call <9 x i32> @llvm.ssub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
|
||||
; FAST-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
|
||||
; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
|
||||
|
@ -259,7 +259,7 @@ define i32 @sub(i32 %arg) {
|
|||
; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V9I32 = call <9 x i32> @llvm.ssub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V9I32 = call <9 x i32> @llvm.ssub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
|
||||
|
@ -288,7 +288,7 @@ define i32 @sub(i32 %arg) {
|
|||
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
|
||||
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
|
||||
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V9I32 = call <9 x i32> @llvm.ssub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
|
||||
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V9I32 = call <9 x i32> @llvm.ssub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
|
||||
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
|
||||
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
|
||||
|
@ -317,7 +317,7 @@ define i32 @sub(i32 %arg) {
|
|||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V9I32 = call <9 x i32> @llvm.ssub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V9I32 = call <9 x i32> @llvm.ssub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
|
||||
|
|
|
@ -50,7 +50,7 @@ define i32 @add(i32 %arg) {
|
|||
; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
|
||||
; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
|
||||
; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; FAST-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V9I32 = call <9 x i32> @llvm.uadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
|
||||
; FAST-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V9I32 = call <9 x i32> @llvm.uadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
|
||||
; FAST-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)
|
||||
; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
|
||||
|
@ -79,7 +79,7 @@ define i32 @add(i32 %arg) {
|
|||
; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V9I32 = call <9 x i32> @llvm.uadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V9I32 = call <9 x i32> @llvm.uadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
|
||||
|
@ -108,7 +108,7 @@ define i32 @add(i32 %arg) {
|
|||
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
|
||||
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
|
||||
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V9I32 = call <9 x i32> @llvm.uadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
|
||||
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V9I32 = call <9 x i32> @llvm.uadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
|
||||
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)
|
||||
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
|
||||
|
@ -137,7 +137,7 @@ define i32 @add(i32 %arg) {
|
|||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V9I32 = call <9 x i32> @llvm.uadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V9I32 = call <9 x i32> @llvm.uadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
|
||||
|
@ -230,7 +230,7 @@ define i32 @sub(i32 %arg) {
|
|||
; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
|
||||
; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
|
||||
; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; FAST-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V9I32 = call <9 x i32> @llvm.usub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
|
||||
; FAST-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V9I32 = call <9 x i32> @llvm.usub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
|
||||
; FAST-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
|
||||
; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
|
||||
|
@ -259,7 +259,7 @@ define i32 @sub(i32 %arg) {
|
|||
; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V9I32 = call <9 x i32> @llvm.usub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V9I32 = call <9 x i32> @llvm.usub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
|
||||
|
@ -288,7 +288,7 @@ define i32 @sub(i32 %arg) {
|
|||
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
|
||||
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
|
||||
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V9I32 = call <9 x i32> @llvm.usub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
|
||||
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V9I32 = call <9 x i32> @llvm.usub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
|
||||
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
|
||||
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
|
||||
|
@ -317,7 +317,7 @@ define i32 @sub(i32 %arg) {
|
|||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V9I32 = call <9 x i32> @llvm.usub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V9I32 = call <9 x i32> @llvm.usub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
|
||||
|
|
|
@ -15,7 +15,7 @@ define amdgpu_kernel void @fadd_f32() #0 {
|
|||
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fadd <4 x float> undef, undef
|
||||
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fadd <5 x float> undef, undef
|
||||
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fadd <8 x float> undef, undef
|
||||
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fadd <9 x float> undef, undef
|
||||
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v9f32 = fadd <9 x float> undef, undef
|
||||
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
|
||||
;
|
||||
; NOPACKEDF32-LABEL: 'fadd_f32'
|
||||
|
@ -25,7 +25,7 @@ define amdgpu_kernel void @fadd_f32() #0 {
|
|||
; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fadd <4 x float> undef, undef
|
||||
; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fadd <5 x float> undef, undef
|
||||
; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fadd <8 x float> undef, undef
|
||||
; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fadd <9 x float> undef, undef
|
||||
; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v9f32 = fadd <9 x float> undef, undef
|
||||
; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
|
||||
;
|
||||
; GFX90A-FASTF64-SIZE-LABEL: 'fadd_f32'
|
||||
|
@ -35,7 +35,7 @@ define amdgpu_kernel void @fadd_f32() #0 {
|
|||
; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fadd <4 x float> undef, undef
|
||||
; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fadd <5 x float> undef, undef
|
||||
; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fadd <8 x float> undef, undef
|
||||
; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fadd <9 x float> undef, undef
|
||||
; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v9f32 = fadd <9 x float> undef, undef
|
||||
; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
|
||||
;
|
||||
; NOPACKEDF32-SIZE-LABEL: 'fadd_f32'
|
||||
|
@ -45,7 +45,7 @@ define amdgpu_kernel void @fadd_f32() #0 {
|
|||
; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fadd <4 x float> undef, undef
|
||||
; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fadd <5 x float> undef, undef
|
||||
; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fadd <8 x float> undef, undef
|
||||
; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fadd <9 x float> undef, undef
|
||||
; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v9f32 = fadd <9 x float> undef, undef
|
||||
; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
|
||||
;
|
||||
%f32 = fadd float undef, undef
|
||||
|
|
|
@ -20,7 +20,7 @@ define amdgpu_kernel void @fdiv_f32_ieee() #0 {
|
|||
; ALL-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> undef, undef
|
||||
; ALL-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> undef, undef
|
||||
; ALL-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v8f32 = fdiv <8 x float> undef, undef
|
||||
; ALL-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %v9f32 = fdiv <9 x float> undef, undef
|
||||
; ALL-NEXT: Cost Model: Found an estimated cost of 378 for instruction: %v9f32 = fdiv <9 x float> undef, undef
|
||||
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
|
||||
;
|
||||
; ALL-SIZE-LABEL: 'fdiv_f32_ieee'
|
||||
|
@ -30,7 +30,7 @@ define amdgpu_kernel void @fdiv_f32_ieee() #0 {
|
|||
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> undef, undef
|
||||
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> undef, undef
|
||||
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v8f32 = fdiv <8 x float> undef, undef
|
||||
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %v9f32 = fdiv <9 x float> undef, undef
|
||||
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 324 for instruction: %v9f32 = fdiv <9 x float> undef, undef
|
||||
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
|
||||
;
|
||||
%f32 = fdiv float undef, undef
|
||||
|
@ -51,7 +51,7 @@ define amdgpu_kernel void @fdiv_f32_ftzdaz() #1 {
|
|||
; ALL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4f32 = fdiv <4 x float> undef, undef
|
||||
; ALL-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v5f32 = fdiv <5 x float> undef, undef
|
||||
; ALL-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8f32 = fdiv <8 x float> undef, undef
|
||||
; ALL-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %v9f32 = fdiv <9 x float> undef, undef
|
||||
; ALL-NEXT: Cost Model: Found an estimated cost of 432 for instruction: %v9f32 = fdiv <9 x float> undef, undef
|
||||
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
|
||||
;
|
||||
; ALL-SIZE-LABEL: 'fdiv_f32_ftzdaz'
|
||||
|
@ -61,7 +61,7 @@ define amdgpu_kernel void @fdiv_f32_ftzdaz() #1 {
|
|||
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> undef, undef
|
||||
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> undef, undef
|
||||
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v8f32 = fdiv <8 x float> undef, undef
|
||||
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %v9f32 = fdiv <9 x float> undef, undef
|
||||
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 378 for instruction: %v9f32 = fdiv <9 x float> undef, undef
|
||||
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
|
||||
;
|
||||
%f32 = fdiv float undef, undef
|
||||
|
|
|
@ -17,7 +17,7 @@ define amdgpu_kernel void @fma_f32() #0 {
|
|||
; SLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
|
||||
; SLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
|
||||
; SLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2
|
||||
; SLOWF64-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
|
||||
; SLOWF64-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
|
||||
; SLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
|
||||
;
|
||||
; FASTF64-LABEL: 'fma_f32'
|
||||
|
@ -27,7 +27,7 @@ define amdgpu_kernel void @fma_f32() #0 {
|
|||
; FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
|
||||
; FASTF64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
|
||||
; FASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2
|
||||
; FASTF64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
|
||||
; FASTF64-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
|
||||
; FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
|
||||
;
|
||||
; SLOW-LABEL: 'fma_f32'
|
||||
|
@ -37,7 +37,7 @@ define amdgpu_kernel void @fma_f32() #0 {
|
|||
; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
|
||||
; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
|
||||
;
|
||||
; SLOWF64-SIZE-LABEL: 'fma_f32'
|
||||
|
@ -47,7 +47,7 @@ define amdgpu_kernel void @fma_f32() #0 {
|
|||
; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
|
||||
; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
|
||||
; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2
|
||||
; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
|
||||
; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
|
||||
; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
|
||||
;
|
||||
; FASTF64-SIZE-LABEL: 'fma_f32'
|
||||
|
@ -57,7 +57,7 @@ define amdgpu_kernel void @fma_f32() #0 {
|
|||
; FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
|
||||
; FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
|
||||
; FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2
|
||||
; FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
|
||||
; FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
|
||||
; FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
|
||||
;
|
||||
; SLOW-SIZE-LABEL: 'fma_f32'
|
||||
|
@ -67,7 +67,7 @@ define amdgpu_kernel void @fma_f32() #0 {
|
|||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
|
||||
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
|
||||
;
|
||||
%f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #1
|
||||
|
|
|
@ -15,7 +15,7 @@ define amdgpu_kernel void @fmul_f32() #0 {
|
|||
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fmul <4 x float> undef, undef
|
||||
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fmul <5 x float> undef, undef
|
||||
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fmul <8 x float> undef, undef
|
||||
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fmul <9 x float> undef, undef
|
||||
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v9f32 = fmul <9 x float> undef, undef
|
||||
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
|
||||
;
|
||||
; F32-LABEL: 'fmul_f32'
|
||||
|
@ -25,7 +25,7 @@ define amdgpu_kernel void @fmul_f32() #0 {
|
|||
; F32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fmul <4 x float> undef, undef
|
||||
; F32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fmul <5 x float> undef, undef
|
||||
; F32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fmul <8 x float> undef, undef
|
||||
; F32-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fmul <9 x float> undef, undef
|
||||
; F32-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v9f32 = fmul <9 x float> undef, undef
|
||||
; F32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
|
||||
;
|
||||
; GFX90A-SIZE-LABEL: 'fmul_f32'
|
||||
|
@ -35,7 +35,7 @@ define amdgpu_kernel void @fmul_f32() #0 {
|
|||
; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fmul <4 x float> undef, undef
|
||||
; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fmul <5 x float> undef, undef
|
||||
; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fmul <8 x float> undef, undef
|
||||
; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fmul <9 x float> undef, undef
|
||||
; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v9f32 = fmul <9 x float> undef, undef
|
||||
; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
|
||||
;
|
||||
; SIZE-LABEL: 'fmul_f32'
|
||||
|
@ -45,7 +45,7 @@ define amdgpu_kernel void @fmul_f32() #0 {
|
|||
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fmul <4 x float> undef, undef
|
||||
; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fmul <5 x float> undef, undef
|
||||
; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fmul <8 x float> undef, undef
|
||||
; SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fmul <9 x float> undef, undef
|
||||
; SIZE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v9f32 = fmul <9 x float> undef, undef
|
||||
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
|
||||
;
|
||||
%f32 = fmul float undef, undef
|
||||
|
|
|
@ -15,7 +15,7 @@ define amdgpu_kernel void @fsub_f32() #0 {
|
|||
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fsub <4 x float> undef, undef
|
||||
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fsub <5 x float> undef, undef
|
||||
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fsub <8 x float> undef, undef
|
||||
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fsub <9 x float> undef, undef
|
||||
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v9f32 = fsub <9 x float> undef, undef
|
||||
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
|
||||
;
|
||||
; NOPACKEDF32-LABEL: 'fsub_f32'
|
||||
|
@ -25,7 +25,7 @@ define amdgpu_kernel void @fsub_f32() #0 {
|
|||
; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fsub <4 x float> undef, undef
|
||||
; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fsub <5 x float> undef, undef
|
||||
; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fsub <8 x float> undef, undef
|
||||
; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fsub <9 x float> undef, undef
|
||||
; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v9f32 = fsub <9 x float> undef, undef
|
||||
; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
|
||||
;
|
||||
; GFX90A-FASTF64-SIZE-LABEL: 'fsub_f32'
|
||||
|
@ -35,7 +35,7 @@ define amdgpu_kernel void @fsub_f32() #0 {
|
|||
; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fsub <4 x float> undef, undef
|
||||
; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fsub <5 x float> undef, undef
|
||||
; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fsub <8 x float> undef, undef
|
||||
; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fsub <9 x float> undef, undef
|
||||
; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v9f32 = fsub <9 x float> undef, undef
|
||||
; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
|
||||
;
|
||||
; NOPACKEDF32-SIZE-LABEL: 'fsub_f32'
|
||||
|
@ -45,7 +45,7 @@ define amdgpu_kernel void @fsub_f32() #0 {
|
|||
; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fsub <4 x float> undef, undef
|
||||
; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fsub <5 x float> undef, undef
|
||||
; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fsub <8 x float> undef, undef
|
||||
; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fsub <9 x float> undef, undef
|
||||
; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v9f32 = fsub <9 x float> undef, undef
|
||||
; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
|
||||
;
|
||||
%f32 = fsub float undef, undef
|
||||
|
|
|
@ -13,7 +13,7 @@ define amdgpu_kernel void @mul_i32() #0 {
|
|||
; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4i32 = mul <4 x i32> undef, undef
|
||||
; ALL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5i32 = mul <5 x i32> undef, undef
|
||||
; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8i32 = mul <8 x i32> undef, undef
|
||||
; ALL-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v9i32 = mul <9 x i32> undef, undef
|
||||
; ALL-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %v9i32 = mul <9 x i32> undef, undef
|
||||
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
|
||||
;
|
||||
; ALL-SIZE-LABEL: 'mul_i32'
|
||||
|
@ -23,7 +23,7 @@ define amdgpu_kernel void @mul_i32() #0 {
|
|||
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i32 = mul <4 x i32> undef, undef
|
||||
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i32 = mul <5 x i32> undef, undef
|
||||
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i32 = mul <8 x i32> undef, undef
|
||||
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v9i32 = mul <9 x i32> undef, undef
|
||||
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v9i32 = mul <9 x i32> undef, undef
|
||||
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
|
||||
;
|
||||
%i32 = mul i32 undef, undef
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -688,7 +688,7 @@ body: |
|
|||
; GCN-NEXT: {{ $}}
|
||||
; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr_192 = IMPLICIT_DEF
|
||||
; GCN-NEXT: [[DEF1:%[0-9]+]]:sgpr_192 = IMPLICIT_DEF
|
||||
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[DEF]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5, [[DEF1]], %subreg.sub6_sub7_sub8_sub9_sub10_sub11
|
||||
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_384 = REG_SEQUENCE [[DEF]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5, [[DEF1]], %subreg.sub6_sub7_sub8_sub9_sub10_sub11
|
||||
; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
|
||||
%0:sgpr(<3 x s64>) = G_IMPLICIT_DEF
|
||||
%1:sgpr(<3 x s64>) = G_IMPLICIT_DEF
|
||||
|
|
|
@ -296,7 +296,7 @@ body: |
|
|||
; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr3_sgpr4_sgpr5
|
||||
; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_96 = COPY $sgpr6_sgpr7_sgpr8
|
||||
; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_96 = COPY $sgpr9_sgpr10_sgpr11
|
||||
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2, [[COPY1]], %subreg.sub3_sub4_sub5, [[COPY2]], %subreg.sub6_sub7_sub8, [[COPY3]], %subreg.sub9_sub10_sub11
|
||||
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_384_with_sub0_sub1_sub2 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2, [[COPY1]], %subreg.sub3_sub4_sub5, [[COPY2]], %subreg.sub6_sub7_sub8, [[COPY3]], %subreg.sub9_sub10_sub11
|
||||
; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub0_sub1_sub2
|
||||
; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub3_sub4_sub5
|
||||
; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub6_sub7_sub8
|
||||
|
@ -332,7 +332,7 @@ body: |
|
|||
; GCN-NEXT: {{ $}}
|
||||
; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_192 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_192 = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
|
||||
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5, [[COPY1]], %subreg.sub6_sub7_sub8_sub9_sub10_sub11
|
||||
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_384 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5, [[COPY1]], %subreg.sub6_sub7_sub8_sub9_sub10_sub11
|
||||
; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]].sub0_sub1_sub2
|
||||
; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]].sub3_sub4_sub5
|
||||
; GCN-NEXT: [[COPY4:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]].sub6_sub7_sub8
|
||||
|
|
|
@ -18,7 +18,7 @@ declare i32 @llvm.amdgcn.workitem.id.x()
|
|||
define amdgpu_ps <4 x float> @image_bvh_intersect_ray(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> inreg %tdescr) {
|
||||
; GCN-LABEL: image_bvh_intersect_ray:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: image_bvh_intersect_ray v[0:3], v[0:15], s[0:3]
|
||||
; GCN-NEXT: image_bvh_intersect_ray v[0:3], v[0:10], s[0:3]
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: ; return to shader part epilog
|
||||
; ERR: in function image_bvh_intersect_ray{{.*}}intrinsic not supported on subtarget
|
||||
|
@ -30,7 +30,7 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray(i32 %node_ptr, float %ray_
|
|||
define amdgpu_ps <4 x float> @image_bvh_intersect_ray_flat(i32 %node_ptr, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, float %ray_inv_dir_x, float %ray_inv_dir_y, float %ray_inv_dir_z, <4 x i32> inreg %tdescr) {
|
||||
; GCN-LABEL: image_bvh_intersect_ray_flat:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: image_bvh_intersect_ray v[0:3], v[0:15], s[0:3]
|
||||
; GCN-NEXT: image_bvh_intersect_ray v[0:3], v[0:10], s[0:3]
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: ; return to shader part epilog
|
||||
%ray_origin0 = insertelement <3 x float> undef, float %ray_origin_x, i32 0
|
||||
|
@ -78,7 +78,7 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16(i32 %node_ptr, float %
|
|||
define amdgpu_ps <4 x float> @image_bvh64_intersect_ray(i64 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> inreg %tdescr) {
|
||||
; GCN-LABEL: image_bvh64_intersect_ray:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: image_bvh64_intersect_ray v[0:3], v[0:15], s[0:3]
|
||||
; GCN-NEXT: image_bvh64_intersect_ray v[0:3], v[0:11], s[0:3]
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: ; return to shader part epilog
|
||||
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr)
|
||||
|
@ -89,7 +89,7 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray(i64 %node_ptr, float %ra
|
|||
define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_flat(<2 x i32> %node_ptr_vec, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, float %ray_inv_dir_x, float %ray_inv_dir_y, float %ray_inv_dir_z, <4 x i32> inreg %tdescr) {
|
||||
; GCN-LABEL: image_bvh64_intersect_ray_flat:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: image_bvh64_intersect_ray v[0:3], v[0:15], s[0:3]
|
||||
; GCN-NEXT: image_bvh64_intersect_ray v[0:3], v[0:11], s[0:3]
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: ; return to shader part epilog
|
||||
%node_ptr = bitcast <2 x i32> %node_ptr_vec to i64
|
||||
|
@ -118,7 +118,7 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16(i64 %node_ptr, float
|
|||
; GFX10-NEXT: v_alignbit_b32 v8, v9, v8, 16
|
||||
; GFX10-NEXT: v_and_or_b32 v6, v6, 0xffff, v10
|
||||
; GFX10-NEXT: v_and_or_b32 v7, v7, 0xffff, v11
|
||||
; GFX10-NEXT: image_bvh64_intersect_ray v[0:3], v[0:15], s[0:3] a16
|
||||
; GFX10-NEXT: image_bvh64_intersect_ray v[0:3], v[0:8], s[0:3] a16
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
;
|
||||
|
@ -159,7 +159,7 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr,
|
|||
; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[13:14]
|
||||
; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0
|
||||
; GFX1030-NEXT: s_and_saveexec_b32 s0, s0
|
||||
; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[15:30], s[4:7]
|
||||
; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[15:25], s[4:7]
|
||||
; GFX1030-NEXT: ; implicit-def: $vgpr11
|
||||
; GFX1030-NEXT: ; implicit-def: $vgpr15
|
||||
; GFX1030-NEXT: ; implicit-def: $vgpr16
|
||||
|
@ -182,34 +182,30 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr,
|
|||
;
|
||||
; GFX1013-LABEL: image_bvh_intersect_ray_vgpr_descr:
|
||||
; GFX1013: ; %bb.0:
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v16, v11
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v17, v12
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v18, v13
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v19, v14
|
||||
; GFX1013-NEXT: s_mov_b32 s1, exec_lo
|
||||
; GFX1013-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s4, v16
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s5, v17
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s6, v18
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s7, v19
|
||||
; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[16:17]
|
||||
; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[18:19]
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s4, v11
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s5, v12
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s6, v13
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s7, v14
|
||||
; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[11:12]
|
||||
; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[13:14]
|
||||
; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0
|
||||
; GFX1013-NEXT: s_and_saveexec_b32 s0, s0
|
||||
; GFX1013-NEXT: image_bvh_intersect_ray v[20:23], v[0:15], s[4:7]
|
||||
; GFX1013-NEXT: ; implicit-def: $vgpr16
|
||||
; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19
|
||||
; GFX1013-NEXT: image_bvh_intersect_ray v[15:18], v[0:10], s[4:7]
|
||||
; GFX1013-NEXT: ; implicit-def: $vgpr11
|
||||
; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10
|
||||
; GFX1013-NEXT: ; implicit-def: $vgpr11_vgpr12_vgpr13_vgpr14
|
||||
; GFX1013-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1013-NEXT: s_xor_b32 exec_lo, exec_lo, s0
|
||||
; GFX1013-NEXT: s_cbranch_execnz .LBB6_1
|
||||
; GFX1013-NEXT: ; %bb.2:
|
||||
; GFX1013-NEXT: s_mov_b32 exec_lo, s1
|
||||
; GFX1013-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v0, v20
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v1, v21
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v2, v22
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v3, v23
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v0, v15
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v1, v16
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v2, v17
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v3, v18
|
||||
; GFX1013-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX11-LABEL: image_bvh_intersect_ray_vgpr_descr:
|
||||
|
@ -391,7 +387,7 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr
|
|||
; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[14:15]
|
||||
; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0
|
||||
; GFX1030-NEXT: s_and_saveexec_b32 s0, s0
|
||||
; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[16:31], s[4:7]
|
||||
; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[16:27], s[4:7]
|
||||
; GFX1030-NEXT: ; implicit-def: $vgpr12
|
||||
; GFX1030-NEXT: ; implicit-def: $vgpr16
|
||||
; GFX1030-NEXT: ; implicit-def: $vgpr17
|
||||
|
@ -415,34 +411,30 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr
|
|||
;
|
||||
; GFX1013-LABEL: image_bvh64_intersect_ray_vgpr_descr:
|
||||
; GFX1013: ; %bb.0:
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v16, v12
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v17, v13
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v18, v14
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v19, v15
|
||||
; GFX1013-NEXT: s_mov_b32 s1, exec_lo
|
||||
; GFX1013-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s4, v16
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s5, v17
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s6, v18
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s7, v19
|
||||
; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[16:17]
|
||||
; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[18:19]
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s4, v12
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s5, v13
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s6, v14
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s7, v15
|
||||
; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[12:13]
|
||||
; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[14:15]
|
||||
; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0
|
||||
; GFX1013-NEXT: s_and_saveexec_b32 s0, s0
|
||||
; GFX1013-NEXT: image_bvh64_intersect_ray v[20:23], v[0:15], s[4:7]
|
||||
; GFX1013-NEXT: ; implicit-def: $vgpr16
|
||||
; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19
|
||||
; GFX1013-NEXT: image_bvh64_intersect_ray v[16:19], v[0:11], s[4:7]
|
||||
; GFX1013-NEXT: ; implicit-def: $vgpr12
|
||||
; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
|
||||
; GFX1013-NEXT: ; implicit-def: $vgpr12_vgpr13_vgpr14_vgpr15
|
||||
; GFX1013-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1013-NEXT: s_xor_b32 exec_lo, exec_lo, s0
|
||||
; GFX1013-NEXT: s_cbranch_execnz .LBB8_1
|
||||
; GFX1013-NEXT: ; %bb.2:
|
||||
; GFX1013-NEXT: s_mov_b32 exec_lo, s1
|
||||
; GFX1013-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v0, v20
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v1, v21
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v2, v22
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v3, v23
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v0, v16
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v1, v17
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v2, v18
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v3, v19
|
||||
; GFX1013-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX11-LABEL: image_bvh64_intersect_ray_vgpr_descr:
|
||||
|
@ -508,7 +500,7 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node
|
|||
; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[12:13]
|
||||
; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0
|
||||
; GFX1030-NEXT: s_and_saveexec_b32 s0, s0
|
||||
; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[14:29], s[4:7] a16
|
||||
; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[14:22], s[4:7] a16
|
||||
; GFX1030-NEXT: ; implicit-def: $vgpr10
|
||||
; GFX1030-NEXT: ; implicit-def: $vgpr14
|
||||
; GFX1030-NEXT: ; implicit-def: $vgpr15
|
||||
|
@ -529,42 +521,38 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node
|
|||
;
|
||||
; GFX1013-LABEL: image_bvh64_intersect_ray_a16_vgpr_descr:
|
||||
; GFX1013: ; %bb.0:
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v16, v10
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v17, v11
|
||||
; GFX1013-NEXT: v_lshrrev_b32_e32 v10, 16, v6
|
||||
; GFX1013-NEXT: v_and_b32_e32 v11, 0xffff, v8
|
||||
; GFX1013-NEXT: v_lshrrev_b32_e32 v14, 16, v6
|
||||
; GFX1013-NEXT: v_and_b32_e32 v15, 0xffff, v8
|
||||
; GFX1013-NEXT: v_and_b32_e32 v9, 0xffff, v9
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v18, v12
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v19, v13
|
||||
; GFX1013-NEXT: v_lshlrev_b32_e32 v10, 16, v10
|
||||
; GFX1013-NEXT: v_lshlrev_b32_e32 v11, 16, v11
|
||||
; GFX1013-NEXT: v_alignbit_b32 v8, v9, v8, 16
|
||||
; GFX1013-NEXT: s_mov_b32 s1, exec_lo
|
||||
; GFX1013-NEXT: v_and_or_b32 v6, v6, 0xffff, v10
|
||||
; GFX1013-NEXT: v_and_or_b32 v7, v7, 0xffff, v11
|
||||
; GFX1013-NEXT: v_lshlrev_b32_e32 v14, 16, v14
|
||||
; GFX1013-NEXT: v_lshlrev_b32_e32 v15, 16, v15
|
||||
; GFX1013-NEXT: v_alignbit_b32 v8, v9, v8, 16
|
||||
; GFX1013-NEXT: v_and_or_b32 v6, v6, 0xffff, v14
|
||||
; GFX1013-NEXT: v_and_or_b32 v7, v7, 0xffff, v15
|
||||
; GFX1013-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s4, v16
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s5, v17
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s6, v18
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s7, v19
|
||||
; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[16:17]
|
||||
; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[18:19]
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s4, v10
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s5, v11
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s6, v12
|
||||
; GFX1013-NEXT: v_readfirstlane_b32 s7, v13
|
||||
; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[10:11]
|
||||
; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[12:13]
|
||||
; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0
|
||||
; GFX1013-NEXT: s_and_saveexec_b32 s0, s0
|
||||
; GFX1013-NEXT: image_bvh64_intersect_ray v[20:23], v[0:15], s[4:7] a16
|
||||
; GFX1013-NEXT: ; implicit-def: $vgpr16
|
||||
; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19
|
||||
; GFX1013-NEXT: image_bvh64_intersect_ray v[14:17], v[0:8], s[4:7] a16
|
||||
; GFX1013-NEXT: ; implicit-def: $vgpr10
|
||||
; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8
|
||||
; GFX1013-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13
|
||||
; GFX1013-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1013-NEXT: s_xor_b32 exec_lo, exec_lo, s0
|
||||
; GFX1013-NEXT: s_cbranch_execnz .LBB9_1
|
||||
; GFX1013-NEXT: ; %bb.2:
|
||||
; GFX1013-NEXT: s_mov_b32 exec_lo, s1
|
||||
; GFX1013-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v0, v20
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v1, v21
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v2, v22
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v3, v23
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v0, v14
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v1, v15
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v2, v16
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v3, v17
|
||||
; GFX1013-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX11-LABEL: image_bvh64_intersect_ray_a16_vgpr_descr:
|
||||
|
@ -631,7 +619,7 @@ define amdgpu_kernel void @image_bvh_intersect_ray_nsa_reassign(ptr %p_node_ptr,
|
|||
; GFX1030-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX1030-NEXT: v_mov_b32_e32 v3, 1.0
|
||||
; GFX1030-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[0:15], s[4:7]
|
||||
; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[0:10], s[4:7]
|
||||
; GFX1030-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1030-NEXT: flat_store_dwordx4 v[0:1], v[0:3]
|
||||
; GFX1030-NEXT: s_endpgm
|
||||
|
@ -661,7 +649,7 @@ define amdgpu_kernel void @image_bvh_intersect_ray_nsa_reassign(ptr %p_node_ptr,
|
|||
; GFX1013-NEXT: v_mov_b32_e32 v4, 2.0
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v5, 0x40400000
|
||||
; GFX1013-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1013-NEXT: image_bvh_intersect_ray v[0:3], v[0:15], s[4:7]
|
||||
; GFX1013-NEXT: image_bvh_intersect_ray v[0:3], v[0:10], s[4:7]
|
||||
; GFX1013-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1013-NEXT: flat_store_dwordx4 v[0:1], v[0:3]
|
||||
; GFX1013-NEXT: s_endpgm
|
||||
|
@ -885,7 +873,7 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_nsa_reassign(ptr %p_ray, <4
|
|||
; GFX1030-NEXT: v_mov_b32_e32 v0, 0xb36211c7
|
||||
; GFX1030-NEXT: v_mov_b32_e32 v1, 0x102
|
||||
; GFX1030-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[0:15], s[0:3]
|
||||
; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[0:11], s[0:3]
|
||||
; GFX1030-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1030-NEXT: flat_store_dwordx4 v[0:1], v[0:3]
|
||||
; GFX1030-NEXT: s_endpgm
|
||||
|
@ -914,7 +902,7 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_nsa_reassign(ptr %p_ray, <4
|
|||
; GFX1013-NEXT: v_mov_b32_e32 v0, 0xb36211c7
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v1, 0x102
|
||||
; GFX1013-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1013-NEXT: image_bvh64_intersect_ray v[0:3], v[0:15], s[4:7]
|
||||
; GFX1013-NEXT: image_bvh64_intersect_ray v[0:3], v[0:11], s[4:7]
|
||||
; GFX1013-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1013-NEXT: flat_store_dwordx4 v[0:1], v[0:3]
|
||||
; GFX1013-NEXT: s_endpgm
|
||||
|
@ -1012,7 +1000,7 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(ptr %p_ray
|
|||
; GFX1030-NEXT: v_mov_b32_e32 v7, s4
|
||||
; GFX1030-NEXT: v_mov_b32_e32 v8, s6
|
||||
; GFX1030-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[0:15], s[0:3] a16
|
||||
; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[0:8], s[0:3] a16
|
||||
; GFX1030-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1030-NEXT: flat_store_dwordx4 v[0:1], v[0:3]
|
||||
; GFX1030-NEXT: s_endpgm
|
||||
|
@ -1056,7 +1044,7 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(ptr %p_ray
|
|||
; GFX1013-NEXT: v_mov_b32_e32 v7, s0
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v8, s2
|
||||
; GFX1013-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1013-NEXT: image_bvh64_intersect_ray v[0:3], v[0:15], s[4:7] a16
|
||||
; GFX1013-NEXT: image_bvh64_intersect_ray v[0:3], v[0:8], s[4:7] a16
|
||||
; GFX1013-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1013-NEXT: flat_store_dwordx4 v[0:1], v[0:3]
|
||||
; GFX1013-NEXT: s_endpgm
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# RUN: llc -march=amdgcn -run-pass simple-register-coalescing -o - %s | FileCheck %s
|
||||
# Check that %11 and %20 have been coalesced.
|
||||
# CHECK: IMAGE_SAMPLE_C_D_O_V1_V16 %[[REG:[0-9]+]]
|
||||
# CHECK: IMAGE_SAMPLE_C_D_O_V1_V16 %[[REG]]
|
||||
# CHECK: IMAGE_SAMPLE_C_D_O_V1_V11 %[[REG:[0-9]+]]
|
||||
# CHECK: IMAGE_SAMPLE_C_D_O_V1_V11 %[[REG]]
|
||||
|
||||
---
|
||||
name: main
|
||||
|
@ -17,9 +17,9 @@ registers:
|
|||
- { id: 6, class: sgpr_128 }
|
||||
- { id: 7, class: sgpr_512 }
|
||||
- { id: 9, class: vreg_512 }
|
||||
- { id: 11, class: vreg_512 }
|
||||
- { id: 11, class: vreg_352 }
|
||||
- { id: 18, class: vgpr_32 }
|
||||
- { id: 20, class: vreg_512 }
|
||||
- { id: 20, class: vreg_352 }
|
||||
- { id: 27, class: vgpr_32 }
|
||||
liveins:
|
||||
- { reg: '$sgpr2_sgpr3', virtual-reg: '%0' }
|
||||
|
@ -61,7 +61,7 @@ body: |
|
|||
%11.sub6 = COPY %1
|
||||
%11.sub7 = COPY %1
|
||||
%11.sub8 = COPY %1
|
||||
dead %18 = IMAGE_SAMPLE_C_D_O_V1_V16 %11, %3, %4, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load (s32))
|
||||
dead %18 = IMAGE_SAMPLE_C_D_O_V1_V11 %11, %3, %4, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load (s32))
|
||||
%20.sub1 = COPY %2
|
||||
%20.sub2 = COPY %2
|
||||
%20.sub3 = COPY %2
|
||||
|
@ -70,6 +70,6 @@ body: |
|
|||
%20.sub6 = COPY %2
|
||||
%20.sub7 = COPY %2
|
||||
%20.sub8 = COPY %2
|
||||
dead %27 = IMAGE_SAMPLE_C_D_O_V1_V16 %20, %5, %6, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load (s32))
|
||||
dead %27 = IMAGE_SAMPLE_C_D_O_V1_V11 %20, %5, %6, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load (s32))
|
||||
|
||||
...
|
||||
|
|
|
@ -171,14 +171,20 @@ define amdgpu_kernel void @test_copy_v4i8_x4(<4 x i8> addrspace(1)* %out0, <4 x
|
|||
; SI-NEXT: v_mov_b32_e32 v1, 0
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
||||
; SI-NEXT: s_mov_b32 s24, SCRATCH_RSRC_DWORD0
|
||||
; SI-NEXT: s_mov_b32 s25, SCRATCH_RSRC_DWORD1
|
||||
; SI-NEXT: s_mov_b32 s26, -1
|
||||
; SI-NEXT: s_mov_b32 s27, 0xe8f000
|
||||
; SI-NEXT: s_add_u32 s24, s24, s3
|
||||
; SI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
|
||||
; SI-NEXT: s_mov_b32 s10, -1
|
||||
; SI-NEXT: s_addc_u32 s25, s25, 0
|
||||
; SI-NEXT: s_mov_b32 s14, s10
|
||||
; SI-NEXT: s_mov_b32 s15, s11
|
||||
; SI-NEXT: s_mov_b32 s18, s10
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: s_mov_b32 s8, s0
|
||||
; SI-NEXT: s_mov_b32 s9, s1
|
||||
; SI-NEXT: s_mov_b32 s18, s10
|
||||
; SI-NEXT: s_mov_b32 s19, s11
|
||||
; SI-NEXT: s_mov_b32 s22, s10
|
||||
; SI-NEXT: s_mov_b32 s23, s11
|
||||
|
@ -197,24 +203,30 @@ define amdgpu_kernel void @test_copy_v4i8_x4(<4 x i8> addrspace(1)* %out0, <4 x
|
|||
;
|
||||
; VI-LABEL: test_copy_v4i8_x4:
|
||||
; VI: ; %bb.0:
|
||||
; VI-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0
|
||||
; VI-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1
|
||||
; VI-NEXT: s_mov_b32 s90, -1
|
||||
; VI-NEXT: s_mov_b32 s91, 0xe80000
|
||||
; VI-NEXT: s_add_u32 s88, s88, s3
|
||||
; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x44
|
||||
; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; VI-NEXT: s_mov_b32 s11, 0xf000
|
||||
; VI-NEXT: s_mov_b32 s10, -1
|
||||
; VI-NEXT: s_mov_b32 s14, s10
|
||||
; VI-NEXT: s_addc_u32 s89, s89, 0
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: v_mov_b32_e32 v1, s3
|
||||
; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0
|
||||
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
||||
; VI-NEXT: flat_load_dword v0, v[0:1]
|
||||
; VI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
|
||||
; VI-NEXT: s_mov_b32 s14, s10
|
||||
; VI-NEXT: s_mov_b32 s15, s11
|
||||
; VI-NEXT: s_mov_b32 s18, s10
|
||||
; VI-NEXT: s_mov_b32 s19, s11
|
||||
; VI-NEXT: s_mov_b32 s22, s10
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: s_mov_b32 s8, s0
|
||||
; VI-NEXT: s_mov_b32 s9, s1
|
||||
; VI-NEXT: s_mov_b32 s22, s10
|
||||
; VI-NEXT: s_mov_b32 s23, s11
|
||||
; VI-NEXT: s_mov_b32 s12, s2
|
||||
; VI-NEXT: s_mov_b32 s13, s3
|
||||
|
|
|
@ -7,21 +7,27 @@ define protected amdgpu_kernel void @sccClobber(ptr addrspace(1) %a, ptr addrspa
|
|||
; RRLIST-LABEL: sccClobber:
|
||||
; RRLIST: ; %bb.0: ; %entry
|
||||
; RRLIST-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x24
|
||||
; RRLIST-NEXT: v_mov_b32_e32 v2, 0
|
||||
; RRLIST-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
|
||||
; RRLIST-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
|
||||
; RRLIST-NEXT: s_mov_b32 s22, -1
|
||||
; RRLIST-NEXT: s_mov_b32 s23, 0xe00000
|
||||
; RRLIST-NEXT: s_add_u32 s20, s20, s3
|
||||
; RRLIST-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; RRLIST-NEXT: s_load_dword s16, s[8:9], 0x0
|
||||
; RRLIST-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0
|
||||
; RRLIST-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x0
|
||||
; RRLIST-NEXT: s_load_dwordx2 s[14:15], s[0:1], 0x44
|
||||
; RRLIST-NEXT: s_load_dword s17, s[10:11], 0x0
|
||||
; RRLIST-NEXT: s_addc_u32 s21, s21, 0
|
||||
; RRLIST-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; RRLIST-NEXT: s_min_i32 s4, s16, 0
|
||||
; RRLIST-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
|
||||
; RRLIST-NEXT: s_min_i32 s4, s16, 0
|
||||
; RRLIST-NEXT: v_cmp_lt_i64_e32 vcc, s[12:13], v[0:1]
|
||||
; RRLIST-NEXT: s_and_b64 s[0:1], vcc, exec
|
||||
; RRLIST-NEXT: s_cselect_b32 s0, s16, s17
|
||||
; RRLIST-NEXT: s_cmp_eq_u64 s[12:13], s[2:3]
|
||||
; RRLIST-NEXT: s_cselect_b32 s0, s4, s0
|
||||
; RRLIST-NEXT: v_mov_b32_e32 v2, 0
|
||||
; RRLIST-NEXT: v_mov_b32_e32 v0, s0
|
||||
; RRLIST-NEXT: global_store_dword v2, v0, s[14:15]
|
||||
; RRLIST-NEXT: s_endpgm
|
||||
|
@ -29,21 +35,27 @@ define protected amdgpu_kernel void @sccClobber(ptr addrspace(1) %a, ptr addrspa
|
|||
; FAST-LABEL: sccClobber:
|
||||
; FAST: ; %bb.0: ; %entry
|
||||
; FAST-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x24
|
||||
; FAST-NEXT: v_mov_b32_e32 v2, 0
|
||||
; FAST-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
|
||||
; FAST-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
|
||||
; FAST-NEXT: s_mov_b32 s22, -1
|
||||
; FAST-NEXT: s_mov_b32 s23, 0xe00000
|
||||
; FAST-NEXT: s_add_u32 s20, s20, s3
|
||||
; FAST-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; FAST-NEXT: s_load_dword s16, s[8:9], 0x0
|
||||
; FAST-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0
|
||||
; FAST-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x0
|
||||
; FAST-NEXT: s_load_dwordx2 s[14:15], s[0:1], 0x44
|
||||
; FAST-NEXT: s_load_dword s17, s[10:11], 0x0
|
||||
; FAST-NEXT: s_addc_u32 s21, s21, 0
|
||||
; FAST-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; FAST-NEXT: s_min_i32 s4, s16, 0
|
||||
; FAST-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
|
||||
; FAST-NEXT: s_min_i32 s4, s16, 0
|
||||
; FAST-NEXT: v_cmp_lt_i64_e32 vcc, s[12:13], v[0:1]
|
||||
; FAST-NEXT: s_and_b64 s[0:1], vcc, exec
|
||||
; FAST-NEXT: s_cselect_b32 s0, s16, s17
|
||||
; FAST-NEXT: s_cmp_eq_u64 s[12:13], s[2:3]
|
||||
; FAST-NEXT: s_cselect_b32 s0, s4, s0
|
||||
; FAST-NEXT: v_mov_b32_e32 v2, 0
|
||||
; FAST-NEXT: v_mov_b32_e32 v0, s0
|
||||
; FAST-NEXT: global_store_dword v2, v0, s[14:15]
|
||||
; FAST-NEXT: s_endpgm
|
||||
|
|
|
@ -310,7 +310,7 @@ define <4 x i64> @v4i64_func_void() #0 {
|
|||
; GCN-LABEL: {{^}}v5i64_func_void:
|
||||
; GCN-DAG: buffer_load_dwordx4 v[0:3], off
|
||||
; GCN-DAG: buffer_load_dwordx4 v[4:7], off
|
||||
; GCN-DAG: buffer_load_dwordx4 v[8:11], off
|
||||
; GCN-DAG: buffer_load_dwordx2 v[8:9], off
|
||||
; GCN: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define <5 x i64> @v5i64_func_void() #0 {
|
||||
|
|
|
@ -385,22 +385,19 @@ bb7: ; preds = %bb4, %bb1
|
|||
; GCN: s_load_dword [[ARG:s[0-9]+]]
|
||||
|
||||
; MOVREL: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000
|
||||
; MOVREL: v_mov_b32_e32 v{{[0-9]+}}, 0x41b0cccd
|
||||
; MOVREL: s_waitcnt
|
||||
; MOVREL: s_add_i32 m0, [[ARG]], -16
|
||||
; MOVREL: v_movreld_b32_e32 v{{[0-9]+}}, 4.0
|
||||
; MOVREL: v_mov_b32_e32 v{{[0-9]+}}, 0x41b0cccd
|
||||
; MOVREL: v_movreld_b32_e32 v{{[0-9]+}}, -4.0
|
||||
; MOVREL: s_mov_b32 m0, -1
|
||||
|
||||
|
||||
; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000
|
||||
; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, 0x41b0cccd
|
||||
; IDXMODE: s_waitcnt
|
||||
; IDXMODE: s_add_i32 [[ARG]], [[ARG]], -16
|
||||
; IDXMODE: s_set_gpr_idx_on [[ARG]], gpr_idx(DST)
|
||||
; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, 4.0
|
||||
; IDXMODE: s_set_gpr_idx_off
|
||||
; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, 0x41b0cccd
|
||||
; IDXMODE: s_set_gpr_idx_on [[ARG]], gpr_idx(DST)
|
||||
; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, -4.0
|
||||
; IDXMODE: s_set_gpr_idx_off
|
||||
|
||||
|
|
|
@ -610,10 +610,16 @@ entry:
|
|||
define amdgpu_kernel void @double5_inselt(<5 x double> addrspace(1)* %out, <5 x double> %vec, i32 %sel) {
|
||||
; GCN-LABEL: double5_inselt:
|
||||
; GCN: ; %bb.0: ; %entry
|
||||
; GCN-NEXT: s_mov_b32 s16, SCRATCH_RSRC_DWORD0
|
||||
; GCN-NEXT: s_mov_b32 s17, SCRATCH_RSRC_DWORD1
|
||||
; GCN-NEXT: s_mov_b32 s18, -1
|
||||
; GCN-NEXT: s_mov_b32 s19, 0xe80000
|
||||
; GCN-NEXT: s_add_u32 s16, s16, s3
|
||||
; GCN-NEXT: s_load_dword s12, s[0:1], 0xa4
|
||||
; GCN-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x84
|
||||
; GCN-NEXT: s_load_dwordx2 s[10:11], s[0:1], 0x24
|
||||
; GCN-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x64
|
||||
; GCN-NEXT: s_addc_u32 s17, s17, 0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_cmp_eq_u32 s12, 4
|
||||
; GCN-NEXT: s_cselect_b32 s9, 0x3ff00000, s9
|
||||
|
@ -622,10 +628,8 @@ define amdgpu_kernel void @double5_inselt(<5 x double> addrspace(1)* %out, <5 x
|
|||
; GCN-NEXT: s_cselect_b32 s3, 0x3ff00000, s3
|
||||
; GCN-NEXT: s_cselect_b32 s2, 0, s2
|
||||
; GCN-NEXT: s_cmp_eq_u32 s12, 0
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s8
|
||||
; GCN-NEXT: v_mov_b32_e32 v5, s9
|
||||
; GCN-NEXT: s_cselect_b32 s8, 0x3ff00000, s1
|
||||
; GCN-NEXT: s_cselect_b32 s9, 0, s0
|
||||
; GCN-NEXT: s_cselect_b32 s13, 0x3ff00000, s1
|
||||
; GCN-NEXT: s_cselect_b32 s14, 0, s0
|
||||
; GCN-NEXT: s_cmp_eq_u32 s12, 3
|
||||
; GCN-NEXT: s_cselect_b32 s0, 0x3ff00000, s7
|
||||
; GCN-NEXT: s_cselect_b32 s1, 0, s6
|
||||
|
@ -636,23 +640,26 @@ define amdgpu_kernel void @double5_inselt(<5 x double> addrspace(1)* %out, <5 x
|
|||
; GCN-NEXT: s_add_u32 s0, s10, 16
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, s1
|
||||
; GCN-NEXT: s_addc_u32 s1, s11, 0
|
||||
; GCN-NEXT: v_mov_b32_e32 v7, s1
|
||||
; GCN-NEXT: v_mov_b32_e32 v5, s1
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GCN-NEXT: v_mov_b32_e32 v6, s0
|
||||
; GCN-NEXT: flat_store_dwordx4 v[6:7], v[0:3]
|
||||
; GCN-NEXT: v_mov_b32_e32 v6, s10
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s9
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s8
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s0
|
||||
; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s10
|
||||
; GCN-NEXT: s_add_u32 s0, s10, 32
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s14
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s13
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GCN-NEXT: v_mov_b32_e32 v7, s11
|
||||
; GCN-NEXT: s_add_u32 s0, s10, 32
|
||||
; GCN-NEXT: flat_store_dwordx4 v[6:7], v[0:3]
|
||||
; GCN-NEXT: v_mov_b32_e32 v5, s11
|
||||
; GCN-NEXT: s_addc_u32 s1, s11, 0
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GCN-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
|
||||
; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
||||
; GCN-NEXT: s_nop 0
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, s1
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s8
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s9
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, s0
|
||||
; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
||||
; GCN-NEXT: s_endpgm
|
||||
entry:
|
||||
%v = insertelement <5 x double> %vec, double 1.000000e+00, i32 %sel
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
; FIXME: For some reason the 8 and 16 vectors are being stored as
|
||||
; individual elements instead of 128-bit stores.
|
||||
|
||||
define amdgpu_kernel void @insertelement_v2f32_0(<2 x float> addrspace(1)* %out, <2 x float> %a) nounwind {
|
||||
define amdgpu_kernel void @insertelement_v2f32_0(ptr addrspace(1) %out, <2 x float> %a) nounwind {
|
||||
; SI-LABEL: insertelement_v2f32_0:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
||||
|
@ -37,7 +37,7 @@ define amdgpu_kernel void @insertelement_v2f32_0(<2 x float> addrspace(1)* %out,
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @insertelement_v2f32_1(<2 x float> addrspace(1)* %out, <2 x float> %a) nounwind {
|
||||
define amdgpu_kernel void @insertelement_v2f32_1(ptr addrspace(1) %out, <2 x float> %a) nounwind {
|
||||
; SI-LABEL: insertelement_v2f32_1:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
||||
|
@ -68,7 +68,7 @@ define amdgpu_kernel void @insertelement_v2f32_1(<2 x float> addrspace(1)* %out,
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @insertelement_v2i32_0(<2 x i32> addrspace(1)* %out, <2 x i32> %a) nounwind {
|
||||
define amdgpu_kernel void @insertelement_v2i32_0(ptr addrspace(1) %out, <2 x i32> %a) nounwind {
|
||||
; SI-LABEL: insertelement_v2i32_0:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
||||
|
@ -99,7 +99,7 @@ define amdgpu_kernel void @insertelement_v2i32_0(<2 x i32> addrspace(1)* %out, <
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @insertelement_v2i32_1(<2 x i32> addrspace(1)* %out, <2 x i32> %a) nounwind {
|
||||
define amdgpu_kernel void @insertelement_v2i32_1(ptr addrspace(1) %out, <2 x i32> %a) nounwind {
|
||||
; SI-LABEL: insertelement_v2i32_1:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
||||
|
@ -132,7 +132,7 @@ define amdgpu_kernel void @insertelement_v2i32_1(<2 x i32> addrspace(1)* %out, <
|
|||
|
||||
; FIXME: Why is the constant moved into the intermediate register and
|
||||
; not just directly into the vector component?
|
||||
define amdgpu_kernel void @insertelement_v4f32_0(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
|
||||
define amdgpu_kernel void @insertelement_v4f32_0(ptr addrspace(1) %out, <4 x float> %a) nounwind {
|
||||
; SI-LABEL: insertelement_v4f32_0:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x4
|
||||
|
@ -167,7 +167,7 @@ define amdgpu_kernel void @insertelement_v4f32_0(<4 x float> addrspace(1)* %out,
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @insertelement_v4f32_1(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
|
||||
define amdgpu_kernel void @insertelement_v4f32_1(ptr addrspace(1) %out, <4 x float> %a) nounwind {
|
||||
; SI-LABEL: insertelement_v4f32_1:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x4
|
||||
|
@ -202,7 +202,7 @@ define amdgpu_kernel void @insertelement_v4f32_1(<4 x float> addrspace(1)* %out,
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @insertelement_v4f32_2(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
|
||||
define amdgpu_kernel void @insertelement_v4f32_2(ptr addrspace(1) %out, <4 x float> %a) nounwind {
|
||||
; SI-LABEL: insertelement_v4f32_2:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x4
|
||||
|
@ -237,7 +237,7 @@ define amdgpu_kernel void @insertelement_v4f32_2(<4 x float> addrspace(1)* %out,
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @insertelement_v4f32_3(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
|
||||
define amdgpu_kernel void @insertelement_v4f32_3(ptr addrspace(1) %out, <4 x float> %a) nounwind {
|
||||
; SI-LABEL: insertelement_v4f32_3:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x4
|
||||
|
@ -272,7 +272,7 @@ define amdgpu_kernel void @insertelement_v4f32_3(<4 x float> addrspace(1)* %out,
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @insertelement_v4i32_0(<4 x i32> addrspace(1)* %out, <4 x i32> %a) nounwind {
|
||||
define amdgpu_kernel void @insertelement_v4i32_0(ptr addrspace(1) %out, <4 x i32> %a) nounwind {
|
||||
; SI-LABEL: insertelement_v4i32_0:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x4
|
||||
|
@ -307,7 +307,7 @@ define amdgpu_kernel void @insertelement_v4i32_0(<4 x i32> addrspace(1)* %out, <
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @insertelement_v3f32_1(<3 x float> addrspace(1)* %out, <3 x float> %a) nounwind {
|
||||
define amdgpu_kernel void @insertelement_v3f32_1(ptr addrspace(1) %out, <3 x float> %a) nounwind {
|
||||
; SI-LABEL: insertelement_v3f32_1:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x4
|
||||
|
@ -338,7 +338,7 @@ define amdgpu_kernel void @insertelement_v3f32_1(<3 x float> addrspace(1)* %out,
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @insertelement_v3f32_2(<3 x float> addrspace(1)* %out, <3 x float> %a) nounwind {
|
||||
define amdgpu_kernel void @insertelement_v3f32_2(ptr addrspace(1) %out, <3 x float> %a) nounwind {
|
||||
; SI-LABEL: insertelement_v3f32_2:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x4
|
||||
|
@ -369,7 +369,7 @@ define amdgpu_kernel void @insertelement_v3f32_2(<3 x float> addrspace(1)* %out,
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @insertelement_v3f32_3(<3 x float> addrspace(1)* %out, <3 x float> %a) nounwind {
|
||||
define amdgpu_kernel void @insertelement_v3f32_3(ptr addrspace(1) %out, <3 x float> %a) nounwind {
|
||||
; GCN-LABEL: insertelement_v3f32_3:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_endpgm
|
||||
|
@ -394,7 +394,107 @@ define <4 x float> @insertelement_to_sgpr() nounwind {
|
|||
ret <4 x float> %tmp2
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @dynamic_insertelement_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, i32 %b) nounwind {
|
||||
define <9 x float> @insertelement_to_v9f32_undef() nounwind {
|
||||
; GCN-LABEL: insertelement_to_v9f32_undef:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0x40a00000
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, 0xc0a00000
|
||||
; GCN-NEXT: v_mov_b32_e32 v7, 0x41880000
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_load_dword s4, s[4:5], 0x0
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, s7
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s8
|
||||
; GCN-NEXT: v_mov_b32_e32 v5, s9
|
||||
; GCN-NEXT: v_mov_b32_e32 v6, s10
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_mov_b32_e32 v8, s4
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%tmp = load <9 x float>, <9 x float> addrspace(4)* undef
|
||||
%tmp1 = insertelement <9 x float> %tmp, float 5.000, i32 0
|
||||
%tmp2 = insertelement <9 x float> %tmp1, float -5.000, i32 2
|
||||
%tmp3 = insertelement <9 x float> %tmp2, float 17.000, i32 7
|
||||
ret <9 x float> %tmp3
|
||||
}
|
||||
|
||||
define <10 x float> @insertelement_to_v10f32_undef() nounwind {
|
||||
; GCN-LABEL: insertelement_to_v10f32_undef:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 2.0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x0
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, s6
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, s7
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s8
|
||||
; GCN-NEXT: v_mov_b32_e32 v5, s9
|
||||
; GCN-NEXT: v_mov_b32_e32 v6, s10
|
||||
; GCN-NEXT: v_mov_b32_e32 v7, s11
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_mov_b32_e32 v8, s12
|
||||
; GCN-NEXT: v_mov_b32_e32 v9, s13
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%tmp = load <10 x float>, <10 x float> addrspace(4)* undef
|
||||
%tmp1 = insertelement <10 x float> %tmp, float 2.0, i32 0
|
||||
ret <10 x float> %tmp1
|
||||
}
|
||||
|
||||
define <11 x float> @insertelement_to_v11f32_undef() nounwind {
|
||||
; GCN-LABEL: insertelement_to_v11f32_undef:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 1.0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x0
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, s6
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, s7
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s8
|
||||
; GCN-NEXT: v_mov_b32_e32 v5, s9
|
||||
; GCN-NEXT: v_mov_b32_e32 v6, s10
|
||||
; GCN-NEXT: v_mov_b32_e32 v7, s11
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_mov_b32_e32 v8, s12
|
||||
; GCN-NEXT: v_mov_b32_e32 v9, s13
|
||||
; GCN-NEXT: v_mov_b32_e32 v10, s14
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%tmp = load <11 x float>, <11 x float> addrspace(4)* undef
|
||||
%tmp1 = insertelement <11 x float> %tmp, float 1.000, i32 0
|
||||
ret <11 x float> %tmp1
|
||||
}
|
||||
|
||||
define <12 x float> @insertelement_to_v12f32_undef() nounwind {
|
||||
; GCN-LABEL: insertelement_to_v12f32_undef:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 4.0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x0
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, s6
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, s7
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s8
|
||||
; GCN-NEXT: v_mov_b32_e32 v5, s9
|
||||
; GCN-NEXT: v_mov_b32_e32 v6, s10
|
||||
; GCN-NEXT: v_mov_b32_e32 v7, s11
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_mov_b32_e32 v8, s12
|
||||
; GCN-NEXT: v_mov_b32_e32 v9, s13
|
||||
; GCN-NEXT: v_mov_b32_e32 v10, s14
|
||||
; GCN-NEXT: v_mov_b32_e32 v11, s15
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%tmp = load <12 x float>, <12 x float> addrspace(4)* undef
|
||||
%tmp1 = insertelement <12 x float> %tmp, float 4.0, i32 0
|
||||
ret <12 x float> %tmp1
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @dynamic_insertelement_v2f32(ptr addrspace(1) %out, <2 x float> %a, i32 %b) nounwind {
|
||||
; SI-LABEL: dynamic_insertelement_v2f32:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
||||
|
@ -441,7 +541,7 @@ define amdgpu_kernel void @dynamic_insertelement_v2f32(<2 x float> addrspace(1)*
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @dynamic_insertelement_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %a, i32 %b) nounwind {
|
||||
define amdgpu_kernel void @dynamic_insertelement_v3f32(ptr addrspace(1) %out, <3 x float> %a, i32 %b) nounwind {
|
||||
; SI-LABEL: dynamic_insertelement_v3f32:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dword s8, s[4:5], 0x8
|
||||
|
@ -494,7 +594,7 @@ define amdgpu_kernel void @dynamic_insertelement_v3f32(<3 x float> addrspace(1)*
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @dynamic_insertelement_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, i32 %b) nounwind {
|
||||
define amdgpu_kernel void @dynamic_insertelement_v4f32(ptr addrspace(1) %out, <4 x float> %a, i32 %b) nounwind {
|
||||
; SI-LABEL: dynamic_insertelement_v4f32:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dword s8, s[4:5], 0x8
|
||||
|
@ -555,7 +655,7 @@ define amdgpu_kernel void @dynamic_insertelement_v4f32(<4 x float> addrspace(1)*
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @dynamic_insertelement_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, i32 %b) nounwind {
|
||||
define amdgpu_kernel void @dynamic_insertelement_v8f32(ptr addrspace(1) %out, <8 x float> %a, i32 %b) nounwind {
|
||||
; SI-LABEL: dynamic_insertelement_v8f32:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
||||
|
@ -606,7 +706,249 @@ define amdgpu_kernel void @dynamic_insertelement_v8f32(<8 x float> addrspace(1)*
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @dynamic_insertelement_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, i32 %b) nounwind {
|
||||
define amdgpu_kernel void @dynamic_insertelement_v9f32(ptr addrspace(1) %out, <9 x float> %a, i32 %b) nounwind {
|
||||
; SI-LABEL: dynamic_insertelement_v9f32:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
||||
; SI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x10
|
||||
; SI-NEXT: s_load_dword s6, s[4:5], 0x18
|
||||
; SI-NEXT: s_load_dword s4, s[4:5], 0x20
|
||||
; SI-NEXT: v_mov_b32_e32 v9, 0x40a00000
|
||||
; SI-NEXT: s_mov_b32 s3, 0x100f000
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: v_mov_b32_e32 v0, s8
|
||||
; SI-NEXT: v_mov_b32_e32 v1, s9
|
||||
; SI-NEXT: v_mov_b32_e32 v2, s10
|
||||
; SI-NEXT: v_mov_b32_e32 v3, s11
|
||||
; SI-NEXT: v_mov_b32_e32 v4, s12
|
||||
; SI-NEXT: v_mov_b32_e32 v5, s13
|
||||
; SI-NEXT: v_mov_b32_e32 v6, s14
|
||||
; SI-NEXT: v_mov_b32_e32 v7, s15
|
||||
; SI-NEXT: v_mov_b32_e32 v8, s6
|
||||
; SI-NEXT: s_mov_b32 m0, s4
|
||||
; SI-NEXT: s_mov_b32 s2, -1
|
||||
; SI-NEXT: v_movreld_b32_e32 v0, v9
|
||||
; SI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:32
|
||||
; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
|
||||
; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
||||
; SI-NEXT: s_endpgm
|
||||
;
|
||||
; VI-LABEL: dynamic_insertelement_v9f32:
|
||||
; VI: ; %bb.0:
|
||||
; VI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40
|
||||
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
||||
; VI-NEXT: s_load_dword s6, s[4:5], 0x60
|
||||
; VI-NEXT: s_load_dword s4, s[4:5], 0x80
|
||||
; VI-NEXT: v_mov_b32_e32 v9, 0x40a00000
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: v_mov_b32_e32 v0, s8
|
||||
; VI-NEXT: v_mov_b32_e32 v1, s9
|
||||
; VI-NEXT: v_mov_b32_e32 v2, s10
|
||||
; VI-NEXT: v_mov_b32_e32 v3, s11
|
||||
; VI-NEXT: v_mov_b32_e32 v4, s12
|
||||
; VI-NEXT: v_mov_b32_e32 v5, s13
|
||||
; VI-NEXT: v_mov_b32_e32 v6, s14
|
||||
; VI-NEXT: v_mov_b32_e32 v7, s15
|
||||
; VI-NEXT: v_mov_b32_e32 v8, s6
|
||||
; VI-NEXT: s_mov_b32 m0, s4
|
||||
; VI-NEXT: s_mov_b32 s3, 0x1100f000
|
||||
; VI-NEXT: s_mov_b32 s2, -1
|
||||
; VI-NEXT: v_movreld_b32_e32 v0, v9
|
||||
; VI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:32
|
||||
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
|
||||
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
||||
; VI-NEXT: s_endpgm
|
||||
%vecins = insertelement <9 x float> %a, float 5.000000e+00, i32 %b
|
||||
store <9 x float> %vecins, <9 x float> addrspace(1)* %out, align 32
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @dynamic_insertelement_v10f32(ptr addrspace(1) %out, <10 x float> %a, i32 %b) nounwind {
|
||||
; SI-LABEL: dynamic_insertelement_v10f32:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
||||
; SI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x10
|
||||
; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x18
|
||||
; SI-NEXT: s_load_dword s4, s[4:5], 0x20
|
||||
; SI-NEXT: v_mov_b32_e32 v10, 0x40a00000
|
||||
; SI-NEXT: s_mov_b32 s3, 0x100f000
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: v_mov_b32_e32 v0, s8
|
||||
; SI-NEXT: v_mov_b32_e32 v1, s9
|
||||
; SI-NEXT: v_mov_b32_e32 v2, s10
|
||||
; SI-NEXT: v_mov_b32_e32 v3, s11
|
||||
; SI-NEXT: v_mov_b32_e32 v4, s12
|
||||
; SI-NEXT: v_mov_b32_e32 v5, s13
|
||||
; SI-NEXT: v_mov_b32_e32 v6, s14
|
||||
; SI-NEXT: v_mov_b32_e32 v7, s15
|
||||
; SI-NEXT: v_mov_b32_e32 v8, s6
|
||||
; SI-NEXT: v_mov_b32_e32 v9, s7
|
||||
; SI-NEXT: s_mov_b32 m0, s4
|
||||
; SI-NEXT: s_mov_b32 s2, -1
|
||||
; SI-NEXT: v_movreld_b32_e32 v0, v10
|
||||
; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
|
||||
; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
||||
; SI-NEXT: buffer_store_dwordx2 v[8:9], off, s[0:3], 0 offset:32
|
||||
; SI-NEXT: s_endpgm
|
||||
;
|
||||
; VI-LABEL: dynamic_insertelement_v10f32:
|
||||
; VI: ; %bb.0:
|
||||
; VI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40
|
||||
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
||||
; VI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x60
|
||||
; VI-NEXT: s_load_dword s4, s[4:5], 0x80
|
||||
; VI-NEXT: v_mov_b32_e32 v10, 0x40a00000
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: v_mov_b32_e32 v0, s8
|
||||
; VI-NEXT: v_mov_b32_e32 v1, s9
|
||||
; VI-NEXT: v_mov_b32_e32 v2, s10
|
||||
; VI-NEXT: v_mov_b32_e32 v3, s11
|
||||
; VI-NEXT: v_mov_b32_e32 v4, s12
|
||||
; VI-NEXT: v_mov_b32_e32 v5, s13
|
||||
; VI-NEXT: v_mov_b32_e32 v6, s14
|
||||
; VI-NEXT: v_mov_b32_e32 v7, s15
|
||||
; VI-NEXT: v_mov_b32_e32 v8, s6
|
||||
; VI-NEXT: v_mov_b32_e32 v9, s7
|
||||
; VI-NEXT: s_mov_b32 m0, s4
|
||||
; VI-NEXT: s_mov_b32 s3, 0x1100f000
|
||||
; VI-NEXT: s_mov_b32 s2, -1
|
||||
; VI-NEXT: v_movreld_b32_e32 v0, v10
|
||||
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
|
||||
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
||||
; VI-NEXT: buffer_store_dwordx2 v[8:9], off, s[0:3], 0 offset:32
|
||||
; VI-NEXT: s_endpgm
|
||||
%vecins = insertelement <10 x float> %a, float 5.000000e+00, i32 %b
|
||||
store <10 x float> %vecins, <10 x float> addrspace(1)* %out, align 32
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @dynamic_insertelement_v11f32(ptr addrspace(1) %out, <11 x float> %a, i32 %b) nounwind {
|
||||
; SI-LABEL: dynamic_insertelement_v11f32:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
||||
; SI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x10
|
||||
; SI-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x18
|
||||
; SI-NEXT: s_load_dword s4, s[4:5], 0x20
|
||||
; SI-NEXT: v_mov_b32_e32 v11, 0x40a00000
|
||||
; SI-NEXT: s_mov_b32 s3, 0x100f000
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: v_mov_b32_e32 v0, s8
|
||||
; SI-NEXT: v_mov_b32_e32 v1, s9
|
||||
; SI-NEXT: v_mov_b32_e32 v2, s10
|
||||
; SI-NEXT: v_mov_b32_e32 v3, s11
|
||||
; SI-NEXT: v_mov_b32_e32 v4, s12
|
||||
; SI-NEXT: v_mov_b32_e32 v5, s13
|
||||
; SI-NEXT: v_mov_b32_e32 v6, s14
|
||||
; SI-NEXT: v_mov_b32_e32 v7, s15
|
||||
; SI-NEXT: v_mov_b32_e32 v8, s16
|
||||
; SI-NEXT: v_mov_b32_e32 v9, s17
|
||||
; SI-NEXT: v_mov_b32_e32 v10, s18
|
||||
; SI-NEXT: s_mov_b32 m0, s4
|
||||
; SI-NEXT: s_mov_b32 s2, -1
|
||||
; SI-NEXT: v_movreld_b32_e32 v0, v11
|
||||
; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
|
||||
; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
||||
; SI-NEXT: buffer_store_dwordx3 v[8:10], off, s[0:3], 0 offset:32
|
||||
; SI-NEXT: s_endpgm
|
||||
;
|
||||
; VI-LABEL: dynamic_insertelement_v11f32:
|
||||
; VI: ; %bb.0:
|
||||
; VI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40
|
||||
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
||||
; VI-NEXT: v_mov_b32_e32 v11, 0x40a00000
|
||||
; VI-NEXT: s_mov_b32 s3, 0x1100f000
|
||||
; VI-NEXT: s_mov_b32 s2, -1
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: v_mov_b32_e32 v0, s8
|
||||
; VI-NEXT: v_mov_b32_e32 v1, s9
|
||||
; VI-NEXT: v_mov_b32_e32 v2, s10
|
||||
; VI-NEXT: v_mov_b32_e32 v3, s11
|
||||
; VI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x60
|
||||
; VI-NEXT: s_load_dword s4, s[4:5], 0x80
|
||||
; VI-NEXT: v_mov_b32_e32 v4, s12
|
||||
; VI-NEXT: v_mov_b32_e32 v5, s13
|
||||
; VI-NEXT: v_mov_b32_e32 v6, s14
|
||||
; VI-NEXT: v_mov_b32_e32 v7, s15
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: v_mov_b32_e32 v8, s8
|
||||
; VI-NEXT: v_mov_b32_e32 v9, s9
|
||||
; VI-NEXT: v_mov_b32_e32 v10, s10
|
||||
; VI-NEXT: s_mov_b32 m0, s4
|
||||
; VI-NEXT: v_movreld_b32_e32 v0, v11
|
||||
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
|
||||
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
||||
; VI-NEXT: buffer_store_dwordx3 v[8:10], off, s[0:3], 0 offset:32
|
||||
; VI-NEXT: s_endpgm
|
||||
%vecins = insertelement <11 x float> %a, float 5.000000e+00, i32 %b
|
||||
store <11 x float> %vecins, <11 x float> addrspace(1)* %out, align 32
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @dynamic_insertelement_v12f32(ptr addrspace(1) %out, <12 x float> %a, i32 %b) nounwind {
|
||||
; SI-LABEL: dynamic_insertelement_v12f32:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
||||
; SI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x10
|
||||
; SI-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x18
|
||||
; SI-NEXT: s_load_dword s4, s[4:5], 0x20
|
||||
; SI-NEXT: v_mov_b32_e32 v12, 0x40a00000
|
||||
; SI-NEXT: s_mov_b32 s3, 0x100f000
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: v_mov_b32_e32 v0, s8
|
||||
; SI-NEXT: v_mov_b32_e32 v1, s9
|
||||
; SI-NEXT: v_mov_b32_e32 v2, s10
|
||||
; SI-NEXT: v_mov_b32_e32 v3, s11
|
||||
; SI-NEXT: v_mov_b32_e32 v4, s12
|
||||
; SI-NEXT: v_mov_b32_e32 v5, s13
|
||||
; SI-NEXT: v_mov_b32_e32 v6, s14
|
||||
; SI-NEXT: v_mov_b32_e32 v7, s15
|
||||
; SI-NEXT: v_mov_b32_e32 v8, s16
|
||||
; SI-NEXT: v_mov_b32_e32 v9, s17
|
||||
; SI-NEXT: v_mov_b32_e32 v10, s18
|
||||
; SI-NEXT: v_mov_b32_e32 v11, s19
|
||||
; SI-NEXT: s_mov_b32 m0, s4
|
||||
; SI-NEXT: s_mov_b32 s2, -1
|
||||
; SI-NEXT: v_movreld_b32_e32 v0, v12
|
||||
; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
|
||||
; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
||||
; SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
|
||||
; SI-NEXT: s_endpgm
|
||||
;
|
||||
; VI-LABEL: dynamic_insertelement_v12f32:
|
||||
; VI: ; %bb.0:
|
||||
; VI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40
|
||||
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
||||
; VI-NEXT: v_mov_b32_e32 v12, 0x40a00000
|
||||
; VI-NEXT: s_mov_b32 s3, 0x1100f000
|
||||
; VI-NEXT: s_mov_b32 s2, -1
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: v_mov_b32_e32 v0, s8
|
||||
; VI-NEXT: v_mov_b32_e32 v1, s9
|
||||
; VI-NEXT: v_mov_b32_e32 v2, s10
|
||||
; VI-NEXT: v_mov_b32_e32 v3, s11
|
||||
; VI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x60
|
||||
; VI-NEXT: s_load_dword s4, s[4:5], 0x80
|
||||
; VI-NEXT: v_mov_b32_e32 v4, s12
|
||||
; VI-NEXT: v_mov_b32_e32 v5, s13
|
||||
; VI-NEXT: v_mov_b32_e32 v6, s14
|
||||
; VI-NEXT: v_mov_b32_e32 v7, s15
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: v_mov_b32_e32 v8, s8
|
||||
; VI-NEXT: v_mov_b32_e32 v9, s9
|
||||
; VI-NEXT: v_mov_b32_e32 v10, s10
|
||||
; VI-NEXT: v_mov_b32_e32 v11, s11
|
||||
; VI-NEXT: s_mov_b32 m0, s4
|
||||
; VI-NEXT: v_movreld_b32_e32 v0, v12
|
||||
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
|
||||
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
||||
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
|
||||
; VI-NEXT: s_endpgm
|
||||
%vecins = insertelement <12 x float> %a, float 5.000000e+00, i32 %b
|
||||
store <12 x float> %vecins, <12 x float> addrspace(1)* %out, align 32
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @dynamic_insertelement_v16f32(ptr addrspace(1) %out, <16 x float> %a, i32 %b) nounwind {
|
||||
; SI-LABEL: dynamic_insertelement_v16f32:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
||||
|
@ -677,7 +1019,7 @@ define amdgpu_kernel void @dynamic_insertelement_v16f32(<16 x float> addrspace(1
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @dynamic_insertelement_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, i32 %b) nounwind {
|
||||
define amdgpu_kernel void @dynamic_insertelement_v2i32(ptr addrspace(1) %out, <2 x i32> %a, i32 %b) nounwind {
|
||||
; SI-LABEL: dynamic_insertelement_v2i32:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
||||
|
@ -718,7 +1060,7 @@ define amdgpu_kernel void @dynamic_insertelement_v2i32(<2 x i32> addrspace(1)* %
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @dynamic_insertelement_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> %a, i32 %b) nounwind {
|
||||
define amdgpu_kernel void @dynamic_insertelement_v3i32(ptr addrspace(1) %out, <3 x i32> %a, i32 %b) nounwind {
|
||||
; SI-LABEL: dynamic_insertelement_v3i32:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dword s8, s[4:5], 0x8
|
||||
|
@ -763,7 +1105,7 @@ define amdgpu_kernel void @dynamic_insertelement_v3i32(<3 x i32> addrspace(1)* %
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @dynamic_insertelement_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, i32 %b, [8 x i32], i32 %val) nounwind {
|
||||
define amdgpu_kernel void @dynamic_insertelement_v4i32(ptr addrspace(1) %out, <4 x i32> %a, i32 %b, [8 x i32], i32 %val) nounwind {
|
||||
; SI-LABEL: dynamic_insertelement_v4i32:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x4
|
||||
|
@ -816,7 +1158,7 @@ define amdgpu_kernel void @dynamic_insertelement_v4i32(<4 x i32> addrspace(1)* %
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @dynamic_insertelement_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, i32 %b) nounwind {
|
||||
define amdgpu_kernel void @dynamic_insertelement_v8i32(ptr addrspace(1) %out, <8 x i32> %a, i32 %b) nounwind {
|
||||
; SI-LABEL: dynamic_insertelement_v8i32:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x8
|
||||
|
@ -865,7 +1207,241 @@ define amdgpu_kernel void @dynamic_insertelement_v8i32(<8 x i32> addrspace(1)* %
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @dynamic_insertelement_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> %a, i32 %b) nounwind {
|
||||
define amdgpu_kernel void @dynamic_insertelement_v9i32(ptr addrspace(1) %out, <9 x i32> %a, i32 %b) nounwind {
|
||||
; SI-LABEL: dynamic_insertelement_v9i32:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
||||
; SI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x10
|
||||
; SI-NEXT: s_load_dword s6, s[4:5], 0x18
|
||||
; SI-NEXT: s_load_dword s4, s[4:5], 0x20
|
||||
; SI-NEXT: s_mov_b32 s3, 0x100f000
|
||||
; SI-NEXT: s_mov_b32 s2, -1
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: v_mov_b32_e32 v0, s8
|
||||
; SI-NEXT: v_mov_b32_e32 v1, s9
|
||||
; SI-NEXT: v_mov_b32_e32 v2, s10
|
||||
; SI-NEXT: v_mov_b32_e32 v3, s11
|
||||
; SI-NEXT: v_mov_b32_e32 v4, s12
|
||||
; SI-NEXT: v_mov_b32_e32 v5, s13
|
||||
; SI-NEXT: v_mov_b32_e32 v6, s14
|
||||
; SI-NEXT: v_mov_b32_e32 v7, s15
|
||||
; SI-NEXT: v_mov_b32_e32 v8, s6
|
||||
; SI-NEXT: s_mov_b32 m0, s4
|
||||
; SI-NEXT: v_movreld_b32_e32 v0, 5
|
||||
; SI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:32
|
||||
; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
|
||||
; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
||||
; SI-NEXT: s_endpgm
|
||||
;
|
||||
; VI-LABEL: dynamic_insertelement_v9i32:
|
||||
; VI: ; %bb.0:
|
||||
; VI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40
|
||||
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
||||
; VI-NEXT: s_load_dword s6, s[4:5], 0x60
|
||||
; VI-NEXT: s_load_dword s4, s[4:5], 0x80
|
||||
; VI-NEXT: s_mov_b32 s3, 0x1100f000
|
||||
; VI-NEXT: s_mov_b32 s2, -1
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: v_mov_b32_e32 v0, s8
|
||||
; VI-NEXT: v_mov_b32_e32 v1, s9
|
||||
; VI-NEXT: v_mov_b32_e32 v2, s10
|
||||
; VI-NEXT: v_mov_b32_e32 v3, s11
|
||||
; VI-NEXT: v_mov_b32_e32 v4, s12
|
||||
; VI-NEXT: v_mov_b32_e32 v5, s13
|
||||
; VI-NEXT: v_mov_b32_e32 v6, s14
|
||||
; VI-NEXT: v_mov_b32_e32 v7, s15
|
||||
; VI-NEXT: v_mov_b32_e32 v8, s6
|
||||
; VI-NEXT: s_mov_b32 m0, s4
|
||||
; VI-NEXT: v_movreld_b32_e32 v0, 5
|
||||
; VI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:32
|
||||
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
|
||||
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
||||
; VI-NEXT: s_endpgm
|
||||
%vecins = insertelement <9 x i32> %a, i32 5, i32 %b
|
||||
store <9 x i32> %vecins, <9 x i32> addrspace(1)* %out, align 32
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @dynamic_insertelement_v10i32(ptr addrspace(1) %out, <10 x i32> %a, i32 %b) nounwind {
|
||||
; SI-LABEL: dynamic_insertelement_v10i32:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
||||
; SI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x10
|
||||
; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x18
|
||||
; SI-NEXT: s_load_dword s4, s[4:5], 0x20
|
||||
; SI-NEXT: s_mov_b32 s3, 0x100f000
|
||||
; SI-NEXT: s_mov_b32 s2, -1
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: v_mov_b32_e32 v0, s8
|
||||
; SI-NEXT: v_mov_b32_e32 v1, s9
|
||||
; SI-NEXT: v_mov_b32_e32 v2, s10
|
||||
; SI-NEXT: v_mov_b32_e32 v3, s11
|
||||
; SI-NEXT: v_mov_b32_e32 v4, s12
|
||||
; SI-NEXT: v_mov_b32_e32 v5, s13
|
||||
; SI-NEXT: v_mov_b32_e32 v6, s14
|
||||
; SI-NEXT: v_mov_b32_e32 v7, s15
|
||||
; SI-NEXT: v_mov_b32_e32 v8, s6
|
||||
; SI-NEXT: v_mov_b32_e32 v9, s7
|
||||
; SI-NEXT: s_mov_b32 m0, s4
|
||||
; SI-NEXT: v_movreld_b32_e32 v0, 5
|
||||
; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
|
||||
; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
||||
; SI-NEXT: buffer_store_dwordx2 v[8:9], off, s[0:3], 0 offset:32
|
||||
; SI-NEXT: s_endpgm
|
||||
;
|
||||
; VI-LABEL: dynamic_insertelement_v10i32:
|
||||
; VI: ; %bb.0:
|
||||
; VI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40
|
||||
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
||||
; VI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x60
|
||||
; VI-NEXT: s_load_dword s4, s[4:5], 0x80
|
||||
; VI-NEXT: s_mov_b32 s3, 0x1100f000
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: v_mov_b32_e32 v0, s8
|
||||
; VI-NEXT: v_mov_b32_e32 v1, s9
|
||||
; VI-NEXT: v_mov_b32_e32 v2, s10
|
||||
; VI-NEXT: v_mov_b32_e32 v3, s11
|
||||
; VI-NEXT: v_mov_b32_e32 v4, s12
|
||||
; VI-NEXT: v_mov_b32_e32 v5, s13
|
||||
; VI-NEXT: v_mov_b32_e32 v6, s14
|
||||
; VI-NEXT: v_mov_b32_e32 v7, s15
|
||||
; VI-NEXT: v_mov_b32_e32 v8, s6
|
||||
; VI-NEXT: v_mov_b32_e32 v9, s7
|
||||
; VI-NEXT: s_mov_b32 m0, s4
|
||||
; VI-NEXT: s_mov_b32 s2, -1
|
||||
; VI-NEXT: v_movreld_b32_e32 v0, 5
|
||||
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
|
||||
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
||||
; VI-NEXT: buffer_store_dwordx2 v[8:9], off, s[0:3], 0 offset:32
|
||||
; VI-NEXT: s_endpgm
|
||||
%vecins = insertelement <10 x i32> %a, i32 5, i32 %b
|
||||
store <10 x i32> %vecins, <10 x i32> addrspace(1)* %out, align 32
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @dynamic_insertelement_v11i32(ptr addrspace(1) %out, <11 x i32> %a, i32 %b) nounwind {
|
||||
; SI-LABEL: dynamic_insertelement_v11i32:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
||||
; SI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x10
|
||||
; SI-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x18
|
||||
; SI-NEXT: s_load_dword s4, s[4:5], 0x20
|
||||
; SI-NEXT: s_mov_b32 s3, 0x100f000
|
||||
; SI-NEXT: s_mov_b32 s2, -1
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: v_mov_b32_e32 v0, s8
|
||||
; SI-NEXT: v_mov_b32_e32 v1, s9
|
||||
; SI-NEXT: v_mov_b32_e32 v2, s10
|
||||
; SI-NEXT: v_mov_b32_e32 v3, s11
|
||||
; SI-NEXT: v_mov_b32_e32 v4, s12
|
||||
; SI-NEXT: v_mov_b32_e32 v5, s13
|
||||
; SI-NEXT: v_mov_b32_e32 v6, s14
|
||||
; SI-NEXT: v_mov_b32_e32 v7, s15
|
||||
; SI-NEXT: v_mov_b32_e32 v8, s16
|
||||
; SI-NEXT: v_mov_b32_e32 v9, s17
|
||||
; SI-NEXT: v_mov_b32_e32 v10, s18
|
||||
; SI-NEXT: s_mov_b32 m0, s4
|
||||
; SI-NEXT: v_movreld_b32_e32 v0, 5
|
||||
; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
|
||||
; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
||||
; SI-NEXT: buffer_store_dwordx3 v[8:10], off, s[0:3], 0 offset:32
|
||||
; SI-NEXT: s_endpgm
|
||||
;
|
||||
; VI-LABEL: dynamic_insertelement_v11i32:
|
||||
; VI: ; %bb.0:
|
||||
; VI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40
|
||||
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
||||
; VI-NEXT: s_mov_b32 s3, 0x1100f000
|
||||
; VI-NEXT: s_mov_b32 s2, -1
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: v_mov_b32_e32 v0, s8
|
||||
; VI-NEXT: v_mov_b32_e32 v1, s9
|
||||
; VI-NEXT: v_mov_b32_e32 v2, s10
|
||||
; VI-NEXT: v_mov_b32_e32 v3, s11
|
||||
; VI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x60
|
||||
; VI-NEXT: s_load_dword s4, s[4:5], 0x80
|
||||
; VI-NEXT: v_mov_b32_e32 v4, s12
|
||||
; VI-NEXT: v_mov_b32_e32 v5, s13
|
||||
; VI-NEXT: v_mov_b32_e32 v6, s14
|
||||
; VI-NEXT: v_mov_b32_e32 v7, s15
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: v_mov_b32_e32 v8, s8
|
||||
; VI-NEXT: v_mov_b32_e32 v9, s9
|
||||
; VI-NEXT: v_mov_b32_e32 v10, s10
|
||||
; VI-NEXT: s_mov_b32 m0, s4
|
||||
; VI-NEXT: v_movreld_b32_e32 v0, 5
|
||||
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
|
||||
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
||||
; VI-NEXT: buffer_store_dwordx3 v[8:10], off, s[0:3], 0 offset:32
|
||||
; VI-NEXT: s_endpgm
|
||||
%vecins = insertelement <11 x i32> %a, i32 5, i32 %b
|
||||
store <11 x i32> %vecins, <11 x i32> addrspace(1)* %out, align 32
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @dynamic_insertelement_v12i32(ptr addrspace(1) %out, <12 x i32> %a, i32 %b) nounwind {
|
||||
; SI-LABEL: dynamic_insertelement_v12i32:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
||||
; SI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x10
|
||||
; SI-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x18
|
||||
; SI-NEXT: s_load_dword s4, s[4:5], 0x20
|
||||
; SI-NEXT: s_mov_b32 s3, 0x100f000
|
||||
; SI-NEXT: s_mov_b32 s2, -1
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: v_mov_b32_e32 v0, s8
|
||||
; SI-NEXT: v_mov_b32_e32 v1, s9
|
||||
; SI-NEXT: v_mov_b32_e32 v2, s10
|
||||
; SI-NEXT: v_mov_b32_e32 v3, s11
|
||||
; SI-NEXT: v_mov_b32_e32 v4, s12
|
||||
; SI-NEXT: v_mov_b32_e32 v5, s13
|
||||
; SI-NEXT: v_mov_b32_e32 v6, s14
|
||||
; SI-NEXT: v_mov_b32_e32 v7, s15
|
||||
; SI-NEXT: v_mov_b32_e32 v8, s16
|
||||
; SI-NEXT: v_mov_b32_e32 v9, s17
|
||||
; SI-NEXT: v_mov_b32_e32 v10, s18
|
||||
; SI-NEXT: v_mov_b32_e32 v11, s19
|
||||
; SI-NEXT: s_mov_b32 m0, s4
|
||||
; SI-NEXT: v_movreld_b32_e32 v0, 5
|
||||
; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
|
||||
; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
||||
; SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
|
||||
; SI-NEXT: s_endpgm
|
||||
;
|
||||
; VI-LABEL: dynamic_insertelement_v12i32:
|
||||
; VI: ; %bb.0:
|
||||
; VI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40
|
||||
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
||||
; VI-NEXT: s_mov_b32 s3, 0x1100f000
|
||||
; VI-NEXT: s_mov_b32 s2, -1
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: v_mov_b32_e32 v0, s8
|
||||
; VI-NEXT: v_mov_b32_e32 v1, s9
|
||||
; VI-NEXT: v_mov_b32_e32 v2, s10
|
||||
; VI-NEXT: v_mov_b32_e32 v3, s11
|
||||
; VI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x60
|
||||
; VI-NEXT: s_load_dword s4, s[4:5], 0x80
|
||||
; VI-NEXT: v_mov_b32_e32 v4, s12
|
||||
; VI-NEXT: v_mov_b32_e32 v5, s13
|
||||
; VI-NEXT: v_mov_b32_e32 v6, s14
|
||||
; VI-NEXT: v_mov_b32_e32 v7, s15
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: v_mov_b32_e32 v8, s8
|
||||
; VI-NEXT: v_mov_b32_e32 v9, s9
|
||||
; VI-NEXT: v_mov_b32_e32 v10, s10
|
||||
; VI-NEXT: v_mov_b32_e32 v11, s11
|
||||
; VI-NEXT: s_mov_b32 m0, s4
|
||||
; VI-NEXT: v_movreld_b32_e32 v0, 5
|
||||
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
|
||||
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
||||
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
|
||||
; VI-NEXT: s_endpgm
|
||||
%vecins = insertelement <12 x i32> %a, i32 5, i32 %b
|
||||
store <12 x i32> %vecins, <12 x i32> addrspace(1)* %out, align 32
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @dynamic_insertelement_v16i32(ptr addrspace(1) %out, <16 x i32> %a, i32 %b) nounwind {
|
||||
; SI-LABEL: dynamic_insertelement_v16i32:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x10
|
||||
|
@ -934,7 +1510,7 @@ define amdgpu_kernel void @dynamic_insertelement_v16i32(<16 x i32> addrspace(1)*
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @dynamic_insertelement_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, i32 %b) nounwind {
|
||||
define amdgpu_kernel void @dynamic_insertelement_v2i16(ptr addrspace(1) %out, <2 x i16> %a, i32 %b) nounwind {
|
||||
; SI-LABEL: dynamic_insertelement_v2i16:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
||||
|
@ -973,7 +1549,7 @@ define amdgpu_kernel void @dynamic_insertelement_v2i16(<2 x i16> addrspace(1)* %
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @dynamic_insertelement_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %a, i32 %b) nounwind {
|
||||
define amdgpu_kernel void @dynamic_insertelement_v3i16(ptr addrspace(1) %out, <3 x i16> %a, i32 %b) nounwind {
|
||||
; SI-LABEL: dynamic_insertelement_v3i16:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
||||
|
@ -1023,7 +1599,7 @@ define amdgpu_kernel void @dynamic_insertelement_v3i16(<3 x i16> addrspace(1)* %
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @dynamic_insertelement_v2i8(<2 x i8> addrspace(1)* %out, [8 x i32], <2 x i8> %a, [8 x i32], i32 %b) nounwind {
|
||||
define amdgpu_kernel void @dynamic_insertelement_v2i8(ptr addrspace(1) %out, [8 x i32], <2 x i8> %a, [8 x i32], i32 %b) nounwind {
|
||||
; SI-LABEL: dynamic_insertelement_v2i8:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dword s6, s[4:5], 0x13
|
||||
|
@ -1065,7 +1641,7 @@ define amdgpu_kernel void @dynamic_insertelement_v2i8(<2 x i8> addrspace(1)* %ou
|
|||
|
||||
; FIXME: post legalize i16 and i32 shifts aren't merged because of
|
||||
; isTypeDesirableForOp in SimplifyDemandedBits
|
||||
define amdgpu_kernel void @dynamic_insertelement_v3i8(<3 x i8> addrspace(1)* %out, [8 x i32], <3 x i8> %a, [8 x i32], i32 %b) nounwind {
|
||||
define amdgpu_kernel void @dynamic_insertelement_v3i8(ptr addrspace(1) %out, [8 x i32], <3 x i8> %a, [8 x i32], i32 %b) nounwind {
|
||||
; SI-LABEL: dynamic_insertelement_v3i8:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dword s6, s[4:5], 0x13
|
||||
|
@ -1110,7 +1686,7 @@ define amdgpu_kernel void @dynamic_insertelement_v3i8(<3 x i8> addrspace(1)* %ou
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @dynamic_insertelement_v4i8(<4 x i8> addrspace(1)* %out, [8 x i32], <4 x i8> %a, [8 x i32], i32 %b) nounwind {
|
||||
define amdgpu_kernel void @dynamic_insertelement_v4i8(ptr addrspace(1) %out, [8 x i32], <4 x i8> %a, [8 x i32], i32 %b) nounwind {
|
||||
; SI-LABEL: dynamic_insertelement_v4i8:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dword s6, s[4:5], 0x13
|
||||
|
@ -1149,7 +1725,7 @@ define amdgpu_kernel void @dynamic_insertelement_v4i8(<4 x i8> addrspace(1)* %ou
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @s_dynamic_insertelement_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(4)* %a.ptr, i32 %b) nounwind {
|
||||
define amdgpu_kernel void @s_dynamic_insertelement_v8i8(ptr addrspace(1) %out, <8 x i8> addrspace(4)* %a.ptr, i32 %b) nounwind {
|
||||
; SI-LABEL: s_dynamic_insertelement_v8i8:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
||||
|
@ -1201,7 +1777,7 @@ define amdgpu_kernel void @s_dynamic_insertelement_v8i8(<8 x i8> addrspace(1)* %
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @dynamic_insertelement_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> %a, i32 %b) nounwind {
|
||||
define amdgpu_kernel void @dynamic_insertelement_v16i8(ptr addrspace(1) %out, <16 x i8> %a, i32 %b) nounwind {
|
||||
; SI-LABEL: dynamic_insertelement_v16i8:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x4
|
||||
|
@ -1410,24 +1986,24 @@ define amdgpu_kernel void @dynamic_insertelement_v16i8(<16 x i8> addrspace(1)* %
|
|||
|
||||
; This test requires handling INSERT_SUBREG in SIFixSGPRCopies. Check that
|
||||
; the compiler doesn't crash.
|
||||
define amdgpu_kernel void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 addrspace(1)* %in, i32 %a, i32 %b) {
|
||||
define amdgpu_kernel void @insert_split_bb(ptr addrspace(1) %out, i32 addrspace(1)* %in, i32 %a, i32 %b) {
|
||||
; SI-LABEL: insert_split_bb:
|
||||
; SI: ; %bb.0: ; %entry
|
||||
; SI-NEXT: s_load_dword s6, s[4:5], 0x4
|
||||
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: s_cmp_lg_u32 s6, 0
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB30_4
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB42_4
|
||||
; SI-NEXT: ; %bb.1: ; %else
|
||||
; SI-NEXT: s_load_dword s7, s[2:3], 0x1
|
||||
; SI-NEXT: s_mov_b64 s[4:5], 0
|
||||
; SI-NEXT: s_andn2_b64 vcc, exec, s[4:5]
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: s_mov_b64 vcc, vcc
|
||||
; SI-NEXT: s_cbranch_vccnz .LBB30_3
|
||||
; SI-NEXT: .LBB30_2: ; %if
|
||||
; SI-NEXT: s_cbranch_vccnz .LBB42_3
|
||||
; SI-NEXT: .LBB42_2: ; %if
|
||||
; SI-NEXT: s_load_dword s7, s[2:3], 0x0
|
||||
; SI-NEXT: .LBB30_3: ; %endif
|
||||
; SI-NEXT: .LBB42_3: ; %endif
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: v_mov_b32_e32 v0, s6
|
||||
; SI-NEXT: s_mov_b32 s3, 0x100f000
|
||||
|
@ -1435,8 +2011,8 @@ define amdgpu_kernel void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 add
|
|||
; SI-NEXT: v_mov_b32_e32 v1, s7
|
||||
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
|
||||
; SI-NEXT: s_endpgm
|
||||
; SI-NEXT: .LBB30_4:
|
||||
; SI-NEXT: s_branch .LBB30_2
|
||||
; SI-NEXT: .LBB42_4:
|
||||
; SI-NEXT: s_branch .LBB42_2
|
||||
;
|
||||
; VI-LABEL: insert_split_bb:
|
||||
; VI: ; %bb.0: ; %entry
|
||||
|
@ -1444,14 +2020,14 @@ define amdgpu_kernel void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 add
|
|||
; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: s_cmp_lg_u32 s6, 0
|
||||
; VI-NEXT: s_cbranch_scc0 .LBB30_4
|
||||
; VI-NEXT: s_cbranch_scc0 .LBB42_4
|
||||
; VI-NEXT: ; %bb.1: ; %else
|
||||
; VI-NEXT: s_load_dword s7, s[2:3], 0x4
|
||||
; VI-NEXT: s_cbranch_execnz .LBB30_3
|
||||
; VI-NEXT: .LBB30_2: ; %if
|
||||
; VI-NEXT: s_cbranch_execnz .LBB42_3
|
||||
; VI-NEXT: .LBB42_2: ; %if
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: s_load_dword s7, s[2:3], 0x0
|
||||
; VI-NEXT: .LBB30_3: ; %endif
|
||||
; VI-NEXT: .LBB42_3: ; %endif
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: v_mov_b32_e32 v0, s6
|
||||
; VI-NEXT: s_mov_b32 s3, 0x1100f000
|
||||
|
@ -1459,8 +2035,8 @@ define amdgpu_kernel void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 add
|
|||
; VI-NEXT: v_mov_b32_e32 v1, s7
|
||||
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
|
||||
; VI-NEXT: s_endpgm
|
||||
; VI-NEXT: .LBB30_4:
|
||||
; VI-NEXT: s_branch .LBB30_2
|
||||
; VI-NEXT: .LBB42_4:
|
||||
; VI-NEXT: s_branch .LBB42_2
|
||||
entry:
|
||||
%0 = insertelement <2 x i32> undef, i32 %a, i32 0
|
||||
%1 = icmp eq i32 %a, 0
|
||||
|
@ -1483,7 +2059,7 @@ endif:
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @dynamic_insertelement_v2f64(<2 x double> addrspace(1)* %out, [8 x i32], <2 x double> %a, [8 x i32], i32 %b) nounwind {
|
||||
define amdgpu_kernel void @dynamic_insertelement_v2f64(ptr addrspace(1) %out, [8 x i32], <2 x double> %a, [8 x i32], i32 %b) nounwind {
|
||||
; SI-LABEL: dynamic_insertelement_v2f64:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dword s8, s[4:5], 0x18
|
||||
|
@ -1530,7 +2106,7 @@ define amdgpu_kernel void @dynamic_insertelement_v2f64(<2 x double> addrspace(1)
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @dynamic_insertelement_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> %a, i32 %b) nounwind {
|
||||
define amdgpu_kernel void @dynamic_insertelement_v2i64(ptr addrspace(1) %out, <2 x i64> %a, i32 %b) nounwind {
|
||||
; SI-LABEL: dynamic_insertelement_v2i64:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dword s8, s[4:5], 0x8
|
||||
|
@ -1577,7 +2153,7 @@ define amdgpu_kernel void @dynamic_insertelement_v2i64(<2 x i64> addrspace(1)* %
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @dynamic_insertelement_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> %a, i32 %b) nounwind {
|
||||
define amdgpu_kernel void @dynamic_insertelement_v3i64(ptr addrspace(1) %out, <3 x i64> %a, i32 %b) nounwind {
|
||||
; SI-LABEL: dynamic_insertelement_v3i64:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dword s6, s[4:5], 0x10
|
||||
|
@ -1638,7 +2214,7 @@ define amdgpu_kernel void @dynamic_insertelement_v3i64(<3 x i64> addrspace(1)* %
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @dynamic_insertelement_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, i32 %b) nounwind {
|
||||
define amdgpu_kernel void @dynamic_insertelement_v4f64(ptr addrspace(1) %out, <4 x double> %a, i32 %b) nounwind {
|
||||
; SI-LABEL: dynamic_insertelement_v4f64:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dword s6, s[4:5], 0x10
|
||||
|
@ -1709,7 +2285,7 @@ define amdgpu_kernel void @dynamic_insertelement_v4f64(<4 x double> addrspace(1)
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @dynamic_insertelement_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, i32 %b) #0 {
|
||||
define amdgpu_kernel void @dynamic_insertelement_v8f64(ptr addrspace(1) %out, <8 x double> %a, i32 %b) #0 {
|
||||
; SI-LABEL: dynamic_insertelement_v8f64:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dword s6, s[4:5], 0x20
|
||||
|
|
|
@ -1,19 +1,20 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -enable-ipra -print-regusage -o /dev/null 2>&1 < %s | FileCheck %s
|
||||
; Make sure the expected regmask is generated for sub/superregisters.
|
||||
|
||||
; CHECK-DAG: csr Clobbered Registers: $vgpr0 $vgpr0_hi16 $vgpr0_lo16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr0_vgpr1 $vgpr0_vgpr1_vgpr2 {{$}}
|
||||
; CHECK-DAG: csr Clobbered Registers: $vgpr0 $vgpr0_hi16 $vgpr0_lo16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr0_vgpr1 $vgpr0_vgpr1_vgpr2 {{$}}
|
||||
define void @csr() #0 {
|
||||
call void asm sideeffect "", "~{v0},~{v44},~{v45}"() #0
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-DAG: subregs_for_super Clobbered Registers: $vgpr0 $vgpr1 $vgpr0_hi16 $vgpr1_hi16 $vgpr0_lo16 $vgpr1_lo16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 $vgpr0_vgpr1 $vgpr1_vgpr2 $vgpr0_vgpr1_vgpr2 $vgpr1_vgpr2_vgpr3 {{$}}
|
||||
; CHECK-DAG: subregs_for_super Clobbered Registers: $vgpr0 $vgpr1 $vgpr0_hi16 $vgpr1_hi16 $vgpr0_lo16 $vgpr1_lo16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 $vgpr0_vgpr1 $vgpr1_vgpr2 $vgpr0_vgpr1_vgpr2 $vgpr1_vgpr2_vgpr3 {{$}}
|
||||
define void @subregs_for_super() #0 {
|
||||
call void asm sideeffect "", "~{v0},~{v1}"() #0
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-DAG: clobbered_reg_with_sub Clobbered Registers: $vgpr0 $vgpr1 $vgpr0_hi16 $vgpr1_hi16 $vgpr0_lo16 $vgpr1_lo16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 $vgpr0_vgpr1 $vgpr1_vgpr2 $vgpr0_vgpr1_vgpr2 $vgpr1_vgpr2_vgpr3 {{$}}
|
||||
; CHECK-DAG: clobbered_reg_with_sub Clobbered Registers: $vgpr0 $vgpr1 $vgpr0_hi16 $vgpr1_hi16 $vgpr0_lo16 $vgpr1_lo16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 $vgpr0_vgpr1 $vgpr1_vgpr2 $vgpr0_vgpr1_vgpr2 $vgpr1_vgpr2_vgpr3 {{$}}
|
||||
define void @clobbered_reg_with_sub() #0 {
|
||||
call void asm sideeffect "", "~{v[0:1]}"() #0
|
||||
ret void
|
||||
|
@ -44,3 +45,5 @@ define void @vcc() #0 {
|
|||
i8* bitcast (void ()* @vcc to i8*)]
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
||||
; CHECK: {{.*}}
|
||||
|
|
|
@ -2286,57 +2286,56 @@ define amdgpu_kernel void @v5i64_arg(<5 x i64> addrspace(1)* nocapture %out, <5
|
|||
;
|
||||
; VI-LABEL: v5i64_arg:
|
||||
; VI: ; %bb.0: ; %entry
|
||||
; VI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x64
|
||||
; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
|
||||
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x84
|
||||
; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
|
||||
; VI-NEXT: s_load_dwordx2 s[10:11], s[0:1], 0x84
|
||||
; VI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x64
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: v_mov_b32_e32 v0, s8
|
||||
; VI-NEXT: s_add_u32 s8, s2, 16
|
||||
; VI-NEXT: v_mov_b32_e32 v1, s9
|
||||
; VI-NEXT: s_addc_u32 s9, s3, 0
|
||||
; VI-NEXT: v_mov_b32_e32 v4, s8
|
||||
; VI-NEXT: v_mov_b32_e32 v2, s10
|
||||
; VI-NEXT: v_mov_b32_e32 v3, s11
|
||||
; VI-NEXT: v_mov_b32_e32 v5, s9
|
||||
; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
||||
; VI-NEXT: v_mov_b32_e32 v5, s3
|
||||
; VI-NEXT: s_add_u32 s12, s8, 32
|
||||
; VI-NEXT: v_mov_b32_e32 v1, s10
|
||||
; VI-NEXT: s_addc_u32 s13, s9, 0
|
||||
; VI-NEXT: v_mov_b32_e32 v3, s12
|
||||
; VI-NEXT: v_mov_b32_e32 v2, s11
|
||||
; VI-NEXT: v_mov_b32_e32 v0, s4
|
||||
; VI-NEXT: v_mov_b32_e32 v4, s13
|
||||
; VI-NEXT: s_add_u32 s4, s8, 16
|
||||
; VI-NEXT: flat_store_dwordx2 v[3:4], v[1:2]
|
||||
; VI-NEXT: v_mov_b32_e32 v1, s5
|
||||
; VI-NEXT: s_addc_u32 s5, s9, 0
|
||||
; VI-NEXT: v_mov_b32_e32 v4, s4
|
||||
; VI-NEXT: v_mov_b32_e32 v2, s6
|
||||
; VI-NEXT: v_mov_b32_e32 v3, s7
|
||||
; VI-NEXT: v_mov_b32_e32 v4, s2
|
||||
; VI-NEXT: s_add_u32 s2, s2, 32
|
||||
; VI-NEXT: v_mov_b32_e32 v5, s5
|
||||
; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
||||
; VI-NEXT: s_addc_u32 s3, s3, 0
|
||||
; VI-NEXT: v_mov_b32_e32 v2, s2
|
||||
; VI-NEXT: v_mov_b32_e32 v4, s8
|
||||
; VI-NEXT: v_mov_b32_e32 v0, s0
|
||||
; VI-NEXT: v_mov_b32_e32 v1, s1
|
||||
; VI-NEXT: v_mov_b32_e32 v2, s2
|
||||
; VI-NEXT: v_mov_b32_e32 v3, s3
|
||||
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
||||
; VI-NEXT: v_mov_b32_e32 v5, s9
|
||||
; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
||||
; VI-NEXT: s_endpgm
|
||||
;
|
||||
; GFX9-LABEL: v5i64_arg:
|
||||
; GFX9: ; %bb.0: ; %entry
|
||||
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x60
|
||||
; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40
|
||||
; GFX9-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0
|
||||
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x60
|
||||
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v4, 0
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, s1
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, s12
|
||||
; GFX9-NEXT: global_store_dwordx2 v4, v[1:2], s[2:3] offset:32
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, s13
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, s14
|
||||
; GFX9-NEXT: v_mov_b32_e32 v3, s15
|
||||
; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] offset:16
|
||||
; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16
|
||||
; GFX9-NEXT: s_nop 0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, s8
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, s9
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, s10
|
||||
; GFX9-NEXT: v_mov_b32_e32 v3, s11
|
||||
; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7]
|
||||
; GFX9-NEXT: s_nop 0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX9-NEXT: global_store_dwordx2 v4, v[0:1], s[6:7] offset:32
|
||||
; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3]
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
; EG-LABEL: v5i64_arg:
|
||||
|
@ -2429,57 +2428,56 @@ define amdgpu_kernel void @v5f64_arg(<5 x double> addrspace(1)* nocapture %out,
|
|||
;
|
||||
; VI-LABEL: v5f64_arg:
|
||||
; VI: ; %bb.0: ; %entry
|
||||
; VI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x64
|
||||
; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
|
||||
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x84
|
||||
; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
|
||||
; VI-NEXT: s_load_dwordx2 s[10:11], s[0:1], 0x84
|
||||
; VI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x64
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: v_mov_b32_e32 v0, s8
|
||||
; VI-NEXT: s_add_u32 s8, s2, 16
|
||||
; VI-NEXT: v_mov_b32_e32 v1, s9
|
||||
; VI-NEXT: s_addc_u32 s9, s3, 0
|
||||
; VI-NEXT: v_mov_b32_e32 v4, s8
|
||||
; VI-NEXT: v_mov_b32_e32 v2, s10
|
||||
; VI-NEXT: v_mov_b32_e32 v3, s11
|
||||
; VI-NEXT: v_mov_b32_e32 v5, s9
|
||||
; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
||||
; VI-NEXT: v_mov_b32_e32 v5, s3
|
||||
; VI-NEXT: s_add_u32 s12, s8, 32
|
||||
; VI-NEXT: v_mov_b32_e32 v1, s10
|
||||
; VI-NEXT: s_addc_u32 s13, s9, 0
|
||||
; VI-NEXT: v_mov_b32_e32 v3, s12
|
||||
; VI-NEXT: v_mov_b32_e32 v2, s11
|
||||
; VI-NEXT: v_mov_b32_e32 v0, s4
|
||||
; VI-NEXT: v_mov_b32_e32 v4, s13
|
||||
; VI-NEXT: s_add_u32 s4, s8, 16
|
||||
; VI-NEXT: flat_store_dwordx2 v[3:4], v[1:2]
|
||||
; VI-NEXT: v_mov_b32_e32 v1, s5
|
||||
; VI-NEXT: s_addc_u32 s5, s9, 0
|
||||
; VI-NEXT: v_mov_b32_e32 v4, s4
|
||||
; VI-NEXT: v_mov_b32_e32 v2, s6
|
||||
; VI-NEXT: v_mov_b32_e32 v3, s7
|
||||
; VI-NEXT: v_mov_b32_e32 v4, s2
|
||||
; VI-NEXT: s_add_u32 s2, s2, 32
|
||||
; VI-NEXT: v_mov_b32_e32 v5, s5
|
||||
; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
||||
; VI-NEXT: s_addc_u32 s3, s3, 0
|
||||
; VI-NEXT: v_mov_b32_e32 v2, s2
|
||||
; VI-NEXT: v_mov_b32_e32 v4, s8
|
||||
; VI-NEXT: v_mov_b32_e32 v0, s0
|
||||
; VI-NEXT: v_mov_b32_e32 v1, s1
|
||||
; VI-NEXT: v_mov_b32_e32 v2, s2
|
||||
; VI-NEXT: v_mov_b32_e32 v3, s3
|
||||
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
||||
; VI-NEXT: v_mov_b32_e32 v5, s9
|
||||
; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
||||
; VI-NEXT: s_endpgm
|
||||
;
|
||||
; GFX9-LABEL: v5f64_arg:
|
||||
; GFX9: ; %bb.0: ; %entry
|
||||
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x60
|
||||
; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40
|
||||
; GFX9-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0
|
||||
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x60
|
||||
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v4, 0
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, s1
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, s12
|
||||
; GFX9-NEXT: global_store_dwordx2 v4, v[1:2], s[2:3] offset:32
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, s13
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, s14
|
||||
; GFX9-NEXT: v_mov_b32_e32 v3, s15
|
||||
; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] offset:16
|
||||
; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16
|
||||
; GFX9-NEXT: s_nop 0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, s8
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, s9
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, s10
|
||||
; GFX9-NEXT: v_mov_b32_e32 v3, s11
|
||||
; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7]
|
||||
; GFX9-NEXT: s_nop 0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX9-NEXT: global_store_dwordx2 v4, v[0:1], s[6:7] offset:32
|
||||
; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3]
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
; EG-LABEL: v5f64_arg:
|
||||
|
|
|
@ -32,9 +32,9 @@ main_body:
|
|||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_d_3d:
|
||||
; GFX1010-NSA: image_sample_d v[0:3], v[7:22],
|
||||
; GFX1010-NSA: image_sample_d v[0:3], v[7:15],
|
||||
; GFX1030-NSA: image_sample_d v[0:3], [v3, v8, v7, v5, v4, v6, v0, v2, v1],
|
||||
; GFX11-NSA: image_sample_d v[0:3], v[7:22],
|
||||
; GFX11-NSA: image_sample_d v[0:3], v[7:15],
|
||||
define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %r, float %t, float %dsdh, float %dtdv, float %dsdv, float %drdv, float %drdh, float %dtdh) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32(i32 15, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
|
|
|
@ -1568,19 +1568,19 @@ main_body:
|
|||
define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
|
||||
; VERDE-LABEL: sample_c_d_o_2darray_V1:
|
||||
; VERDE: ; %bb.0: ; %main_body
|
||||
; VERDE-NEXT: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 da
|
||||
; VERDE-NEXT: image_sample_c_d_o v0, v[0:8], s[0:7], s[8:11] dmask:0x4 da
|
||||
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
||||
; VERDE-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX6789-LABEL: sample_c_d_o_2darray_V1:
|
||||
; GFX6789: ; %bb.0: ; %main_body
|
||||
; GFX6789-NEXT: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 da
|
||||
; GFX6789-NEXT: image_sample_c_d_o v0, v[0:8], s[0:7], s[8:11] dmask:0x4 da
|
||||
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6789-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10PLUS-LABEL: sample_c_d_o_2darray_V1:
|
||||
; GFX10PLUS: ; %bb.0: ; %main_body
|
||||
; GFX10PLUS-NEXT: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
|
||||
; GFX10PLUS-NEXT: image_sample_c_d_o v0, v[0:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
|
||||
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10PLUS-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
|
@ -1593,7 +1593,7 @@ define amdgpu_ps float @sample_c_d_o_2darray_V1_tfe(<8 x i32> inreg %rsrc, <4 x
|
|||
; VERDE: ; %bb.0: ; %main_body
|
||||
; VERDE-NEXT: v_mov_b32_e32 v9, 0
|
||||
; VERDE-NEXT: v_mov_b32_e32 v10, v9
|
||||
; VERDE-NEXT: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 tfe da
|
||||
; VERDE-NEXT: image_sample_c_d_o v[9:10], v[0:8], s[0:7], s[8:11] dmask:0x4 tfe da
|
||||
; VERDE-NEXT: s_mov_b32 s15, 0xf000
|
||||
; VERDE-NEXT: s_mov_b32 s14, -1
|
||||
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -1608,7 +1608,7 @@ define amdgpu_ps float @sample_c_d_o_2darray_V1_tfe(<8 x i32> inreg %rsrc, <4 x
|
|||
; GFX6789-NEXT: v_mov_b32_e32 v12, v11
|
||||
; GFX6789-NEXT: v_mov_b32_e32 v9, v11
|
||||
; GFX6789-NEXT: v_mov_b32_e32 v10, v12
|
||||
; GFX6789-NEXT: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 tfe da
|
||||
; GFX6789-NEXT: image_sample_c_d_o v[9:10], v[0:8], s[0:7], s[8:11] dmask:0x4 tfe da
|
||||
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6789-NEXT: v_mov_b32_e32 v0, v9
|
||||
; GFX6789-NEXT: global_store_dword v11, v10, s[12:13]
|
||||
|
@ -1621,7 +1621,7 @@ define amdgpu_ps float @sample_c_d_o_2darray_V1_tfe(<8 x i32> inreg %rsrc, <4 x
|
|||
; GFX10-NEXT: v_mov_b32_e32 v12, v11
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, v11
|
||||
; GFX10-NEXT: v_mov_b32_e32 v10, v12
|
||||
; GFX10-NEXT: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe
|
||||
; GFX10-NEXT: image_sample_c_d_o v[9:10], v[0:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, v9
|
||||
; GFX10-NEXT: global_store_dword v11, v10, s[12:13]
|
||||
|
@ -1633,7 +1633,7 @@ define amdgpu_ps float @sample_c_d_o_2darray_V1_tfe(<8 x i32> inreg %rsrc, <4 x
|
|||
; GFX11-NEXT: v_mov_b32_e32 v11, 0
|
||||
; GFX11-NEXT: v_mov_b32_e32 v12, v11
|
||||
; GFX11-NEXT: v_dual_mov_b32 v9, v11 :: v_dual_mov_b32 v10, v12
|
||||
; GFX11-NEXT: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe
|
||||
; GFX11-NEXT: image_sample_c_d_o v[9:10], v[0:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-NEXT: v_mov_b32_e32 v0, v9
|
||||
; GFX11-NEXT: global_store_b32 v11, v10, s[12:13]
|
||||
|
@ -1650,19 +1650,19 @@ main_body:
|
|||
define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
|
||||
; VERDE-LABEL: sample_c_d_o_2darray_V2:
|
||||
; VERDE: ; %bb.0: ; %main_body
|
||||
; VERDE-NEXT: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 da
|
||||
; VERDE-NEXT: image_sample_c_d_o v[0:1], v[0:8], s[0:7], s[8:11] dmask:0x6 da
|
||||
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
||||
; VERDE-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX6789-LABEL: sample_c_d_o_2darray_V2:
|
||||
; GFX6789: ; %bb.0: ; %main_body
|
||||
; GFX6789-NEXT: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 da
|
||||
; GFX6789-NEXT: image_sample_c_d_o v[0:1], v[0:8], s[0:7], s[8:11] dmask:0x6 da
|
||||
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6789-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10PLUS-LABEL: sample_c_d_o_2darray_V2:
|
||||
; GFX10PLUS: ; %bb.0: ; %main_body
|
||||
; GFX10PLUS-NEXT: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
|
||||
; GFX10PLUS-NEXT: image_sample_c_d_o v[0:1], v[0:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
|
||||
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10PLUS-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
|
@ -1676,7 +1676,7 @@ define amdgpu_ps <4 x float> @sample_c_d_o_2darray_V2_tfe(<8 x i32> inreg %rsrc,
|
|||
; VERDE-NEXT: v_mov_b32_e32 v9, 0
|
||||
; VERDE-NEXT: v_mov_b32_e32 v10, v9
|
||||
; VERDE-NEXT: v_mov_b32_e32 v11, v9
|
||||
; VERDE-NEXT: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 tfe da
|
||||
; VERDE-NEXT: image_sample_c_d_o v[9:11], v[0:8], s[0:7], s[8:11] dmask:0x6 tfe da
|
||||
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
||||
; VERDE-NEXT: v_mov_b32_e32 v0, v9
|
||||
; VERDE-NEXT: v_mov_b32_e32 v1, v10
|
||||
|
@ -1688,7 +1688,7 @@ define amdgpu_ps <4 x float> @sample_c_d_o_2darray_V2_tfe(<8 x i32> inreg %rsrc,
|
|||
; GFX6789-NEXT: v_mov_b32_e32 v9, 0
|
||||
; GFX6789-NEXT: v_mov_b32_e32 v10, v9
|
||||
; GFX6789-NEXT: v_mov_b32_e32 v11, v9
|
||||
; GFX6789-NEXT: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 tfe da
|
||||
; GFX6789-NEXT: image_sample_c_d_o v[9:11], v[0:8], s[0:7], s[8:11] dmask:0x6 tfe da
|
||||
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6789-NEXT: v_mov_b32_e32 v0, v9
|
||||
; GFX6789-NEXT: v_mov_b32_e32 v1, v10
|
||||
|
@ -1700,7 +1700,7 @@ define amdgpu_ps <4 x float> @sample_c_d_o_2darray_V2_tfe(<8 x i32> inreg %rsrc,
|
|||
; GFX10-NEXT: v_mov_b32_e32 v9, 0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v10, v9
|
||||
; GFX10-NEXT: v_mov_b32_e32 v11, v9
|
||||
; GFX10-NEXT: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe
|
||||
; GFX10-NEXT: image_sample_c_d_o v[9:11], v[0:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, v9
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, v10
|
||||
|
@ -1712,7 +1712,7 @@ define amdgpu_ps <4 x float> @sample_c_d_o_2darray_V2_tfe(<8 x i32> inreg %rsrc,
|
|||
; GFX11-NEXT: v_mov_b32_e32 v9, 0
|
||||
; GFX11-NEXT: v_mov_b32_e32 v10, v9
|
||||
; GFX11-NEXT: v_mov_b32_e32 v11, v9
|
||||
; GFX11-NEXT: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe
|
||||
; GFX11-NEXT: image_sample_c_d_o v[9:11], v[0:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-NEXT: v_mov_b32_e32 v2, v11
|
||||
; GFX11-NEXT: v_dual_mov_b32 v0, v9 :: v_dual_mov_b32 v1, v10
|
||||
|
|
|
@ -186,7 +186,7 @@ main_body:
|
|||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_d_cl_o_2d:
|
||||
; GCN: image_sample_c_d_cl_o v[0:3], v[0:15], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
; GCN: image_sample_c_d_cl_o v[0:3], v[0:8], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_d_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
|
@ -250,7 +250,7 @@ main_body:
|
|||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_cd_cl_o_2d:
|
||||
; GCN: image_sample_c_cd_cl_o v[0:3], v[0:15], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
; GCN: image_sample_c_cd_cl_o v[0:3], v[0:8], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_cd_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
|
|
|
@ -20,7 +20,7 @@ declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64, float, <3
|
|||
define amdgpu_ps <4 x float> @image_bvh_intersect_ray(i32 %node_ptr, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, float %ray_inv_dir_x, float %ray_inv_dir_y, float %ray_inv_dir_z, <4 x i32> inreg %tdescr) {
|
||||
; GCN-LABEL: image_bvh_intersect_ray:
|
||||
; GCN: ; %bb.0: ; %main_body
|
||||
; GCN-NEXT: image_bvh_intersect_ray v[0:3], v[0:15], s[0:3]
|
||||
; GCN-NEXT: image_bvh_intersect_ray v[0:3], v[0:10], s[0:3]
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
|
@ -90,7 +90,7 @@ main_body:
|
|||
define amdgpu_ps <4 x float> @image_bvh64_intersect_ray(<2 x i32> %node_ptr_vec, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, float %ray_inv_dir_x, float %ray_inv_dir_y, float %ray_inv_dir_z, <4 x i32> inreg %tdescr) {
|
||||
; GCN-LABEL: image_bvh64_intersect_ray:
|
||||
; GCN: ; %bb.0: ; %main_body
|
||||
; GCN-NEXT: image_bvh64_intersect_ray v[0:3], v[0:15], s[0:3]
|
||||
; GCN-NEXT: image_bvh64_intersect_ray v[0:3], v[0:11], s[0:3]
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
|
@ -128,7 +128,7 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16(i64 inreg %node_ptr,
|
|||
; GFX10-NEXT: v_mov_b32_e32 v8, s8
|
||||
; GFX10-NEXT: s_mov_b32 s15, s13
|
||||
; GFX10-NEXT: s_mov_b32 s13, s11
|
||||
; GFX10-NEXT: image_bvh64_intersect_ray v[0:3], v[0:15], s[12:15] a16
|
||||
; GFX10-NEXT: image_bvh64_intersect_ray v[0:3], v[0:8], s[12:15] a16
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
;
|
||||
|
@ -182,7 +182,7 @@ define amdgpu_kernel void @image_bvh_intersect_ray_nsa_reassign(ptr %p_node_ptr,
|
|||
; GFX1013-NEXT: v_mov_b32_e32 v4, 2.0
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v5, 0x40400000
|
||||
; GFX1013-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1013-NEXT: image_bvh_intersect_ray v[0:3], v[0:15], s[4:7]
|
||||
; GFX1013-NEXT: image_bvh_intersect_ray v[0:3], v[0:10], s[4:7]
|
||||
; GFX1013-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1013-NEXT: flat_store_dwordx4 v[0:1], v[0:3]
|
||||
; GFX1013-NEXT: s_endpgm
|
||||
|
@ -208,7 +208,7 @@ define amdgpu_kernel void @image_bvh_intersect_ray_nsa_reassign(ptr %p_node_ptr,
|
|||
; GFX1030-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX1030-NEXT: v_mov_b32_e32 v3, 1.0
|
||||
; GFX1030-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[0:15], s[4:7]
|
||||
; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[0:10], s[4:7]
|
||||
; GFX1030-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1030-NEXT: flat_store_dwordx4 v[0:1], v[0:3]
|
||||
; GFX1030-NEXT: s_endpgm
|
||||
|
@ -370,7 +370,7 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_nsa_reassign(ptr %p_ray, <4
|
|||
; GFX1013-NEXT: v_mov_b32_e32 v0, 0xb36211c7
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v1, 0x102
|
||||
; GFX1013-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1013-NEXT: image_bvh64_intersect_ray v[0:3], v[0:15], s[0:3]
|
||||
; GFX1013-NEXT: image_bvh64_intersect_ray v[0:3], v[0:11], s[0:3]
|
||||
; GFX1013-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1013-NEXT: flat_store_dwordx4 v[0:1], v[0:3]
|
||||
; GFX1013-NEXT: s_endpgm
|
||||
|
@ -396,7 +396,7 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_nsa_reassign(ptr %p_ray, <4
|
|||
; GFX1030-NEXT: v_mov_b32_e32 v1, 0x102
|
||||
; GFX1030-NEXT: v_mov_b32_e32 v0, 0xb36211c7
|
||||
; GFX1030-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[0:15], s[0:3]
|
||||
; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[0:11], s[0:3]
|
||||
; GFX1030-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1030-NEXT: flat_store_dwordx4 v[0:1], v[0:3]
|
||||
; GFX1030-NEXT: s_endpgm
|
||||
|
@ -461,7 +461,7 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(ptr %p_ray
|
|||
; GFX1013-NEXT: v_mov_b32_e32 v0, 0xb36211c6
|
||||
; GFX1013-NEXT: v_mov_b32_e32 v1, 0x102
|
||||
; GFX1013-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1013-NEXT: image_bvh64_intersect_ray v[0:3], v[0:15], s[0:3] a16
|
||||
; GFX1013-NEXT: image_bvh64_intersect_ray v[0:3], v[0:8], s[0:3] a16
|
||||
; GFX1013-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1013-NEXT: flat_store_dwordx4 v[0:1], v[0:3]
|
||||
; GFX1013-NEXT: s_endpgm
|
||||
|
@ -484,7 +484,7 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(ptr %p_ray
|
|||
; GFX1030-NEXT: v_mov_b32_e32 v1, 0x102
|
||||
; GFX1030-NEXT: v_mov_b32_e32 v0, 0xb36211c6
|
||||
; GFX1030-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[0:15], s[0:3] a16
|
||||
; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[0:8], s[0:3] a16
|
||||
; GFX1030-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1030-NEXT: flat_store_dwordx4 v[0:1], v[0:3]
|
||||
; GFX1030-NEXT: s_endpgm
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
; GCN: s_load_dword s{{[0-9]+}}
|
||||
|
||||
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
|
||||
define amdgpu_kernel void @constant_load_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_load_i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
|
||||
entry:
|
||||
%ld = load i32, i32 addrspace(4)* %in
|
||||
store i32 %ld, i32 addrspace(1)* %out
|
||||
|
@ -20,7 +20,7 @@ entry:
|
|||
; GCN: s_load_dwordx2
|
||||
|
||||
; EG: VTX_READ_64
|
||||
define amdgpu_kernel void @constant_load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(4)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_load_v2i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
|
||||
entry:
|
||||
%ld = load <2 x i32>, <2 x i32> addrspace(4)* %in
|
||||
store <2 x i32> %ld, <2 x i32> addrspace(1)* %out
|
||||
|
@ -31,7 +31,7 @@ entry:
|
|||
; GCN: s_load_dwordx4
|
||||
|
||||
; EG: VTX_READ_128
|
||||
define amdgpu_kernel void @constant_load_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(4)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_load_v3i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
|
||||
entry:
|
||||
%ld = load <3 x i32>, <3 x i32> addrspace(4)* %in
|
||||
store <3 x i32> %ld, <3 x i32> addrspace(1)* %out
|
||||
|
@ -42,7 +42,7 @@ entry:
|
|||
; GCN: s_load_dwordx4
|
||||
|
||||
; EG: VTX_READ_128
|
||||
define amdgpu_kernel void @constant_load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(4)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_load_v4i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
|
||||
entry:
|
||||
%ld = load <4 x i32>, <4 x i32> addrspace(4)* %in
|
||||
store <4 x i32> %ld, <4 x i32> addrspace(1)* %out
|
||||
|
@ -54,13 +54,69 @@ entry:
|
|||
|
||||
; EG: VTX_READ_128
|
||||
; EG: VTX_READ_128
|
||||
define amdgpu_kernel void @constant_load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(4)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_load_v8i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
|
||||
entry:
|
||||
%ld = load <8 x i32>, <8 x i32> addrspace(4)* %in
|
||||
store <8 x i32> %ld, <8 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_load_v9i32:
|
||||
; GCN: s_load_dword
|
||||
; GCN: s_load_dwordx8
|
||||
|
||||
; EG: VTX_READ_128
|
||||
; EG: VTX_READ_128
|
||||
; EG: VTX_READ_32
|
||||
define amdgpu_kernel void @constant_load_v9i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
|
||||
entry:
|
||||
%ld = load <9 x i32>, <9 x i32> addrspace(4)* %in
|
||||
store <9 x i32> %ld, <9 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_load_v10i32:
|
||||
; GCN: s_load_dwordx2
|
||||
; GCN: s_load_dwordx8
|
||||
|
||||
; EG: VTX_READ_128
|
||||
; EG: VTX_READ_128
|
||||
; EG: VTX_READ_128
|
||||
define amdgpu_kernel void @constant_load_v10i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
|
||||
entry:
|
||||
%ld = load <10 x i32>, <10 x i32> addrspace(4)* %in
|
||||
store <10 x i32> %ld, <10 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_load_v11i32:
|
||||
; GCN: s_load_dwordx4
|
||||
; GCN: s_load_dwordx8
|
||||
|
||||
; EG: VTX_READ_128
|
||||
; EG: VTX_READ_128
|
||||
; EG: VTX_READ_128
|
||||
define amdgpu_kernel void @constant_load_v11i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
|
||||
entry:
|
||||
%ld = load <11 x i32>, <11 x i32> addrspace(4)* %in
|
||||
store <11 x i32> %ld, <11 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_load_v12i32:
|
||||
; GCN: s_load_dwordx4
|
||||
; GCN: s_load_dwordx8
|
||||
|
||||
; EG: VTX_READ_128
|
||||
; EG: VTX_READ_128
|
||||
; EG: VTX_READ_128
|
||||
define amdgpu_kernel void @constant_load_v12i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
|
||||
entry:
|
||||
%ld = load <12 x i32>, <12 x i32> addrspace(4)* %in
|
||||
store <12 x i32> %ld, <12 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_load_v16i32:
|
||||
; GCN: s_load_dwordx16
|
||||
|
||||
|
@ -68,7 +124,7 @@ entry:
|
|||
; EG: VTX_READ_128
|
||||
; EG: VTX_READ_128
|
||||
; EG: VTX_READ_128
|
||||
define amdgpu_kernel void @constant_load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(4)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_load_v16i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
|
||||
entry:
|
||||
%ld = load <16 x i32>, <16 x i32> addrspace(4)* %in
|
||||
store <16 x i32> %ld, <16 x i32> addrspace(1)* %out
|
||||
|
@ -83,7 +139,7 @@ entry:
|
|||
; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
|
||||
; EG: CF_END
|
||||
; EG: VTX_READ_32
|
||||
define amdgpu_kernel void @constant_zextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(4)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_zextload_i32_to_i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
|
||||
%ld = load i32, i32 addrspace(4)* %in
|
||||
%ext = zext i32 %ld to i64
|
||||
store i64 %ext, i64 addrspace(1)* %out
|
||||
|
@ -100,7 +156,7 @@ define amdgpu_kernel void @constant_zextload_i32_to_i64(i64 addrspace(1)* %out,
|
|||
; EG: VTX_READ_32
|
||||
; EG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.
|
||||
; EG: 31
|
||||
define amdgpu_kernel void @constant_sextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(4)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_sextload_i32_to_i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
|
||||
%ld = load i32, i32 addrspace(4)* %in
|
||||
%ext = sext i32 %ld to i64
|
||||
store i64 %ext, i64 addrspace(1)* %out
|
||||
|
@ -110,7 +166,7 @@ define amdgpu_kernel void @constant_sextload_i32_to_i64(i64 addrspace(1)* %out,
|
|||
; FUNC-LABEL: {{^}}constant_zextload_v1i32_to_v1i64:
|
||||
; GCN: s_load_dword
|
||||
; GCN: store_dwordx2
|
||||
define amdgpu_kernel void @constant_zextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(4)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_zextload_v1i32_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
|
||||
%ld = load <1 x i32>, <1 x i32> addrspace(4)* %in
|
||||
%ext = zext <1 x i32> %ld to <1 x i64>
|
||||
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
|
||||
|
@ -121,7 +177,7 @@ define amdgpu_kernel void @constant_zextload_v1i32_to_v1i64(<1 x i64> addrspace(
|
|||
; GCN: s_load_dword s[[LO:[0-9]+]]
|
||||
; GCN: s_ashr_i32 s[[HI:[0-9]+]], s[[LO]], 31
|
||||
; GCN: store_dwordx2
|
||||
define amdgpu_kernel void @constant_sextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(4)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_sextload_v1i32_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
|
||||
%ld = load <1 x i32>, <1 x i32> addrspace(4)* %in
|
||||
%ext = sext <1 x i32> %ld to <1 x i64>
|
||||
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
|
||||
|
@ -131,7 +187,7 @@ define amdgpu_kernel void @constant_sextload_v1i32_to_v1i64(<1 x i64> addrspace(
|
|||
; FUNC-LABEL: {{^}}constant_zextload_v2i32_to_v2i64:
|
||||
; GCN: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
|
||||
; GCN: store_dwordx4
|
||||
define amdgpu_kernel void @constant_zextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(4)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_zextload_v2i32_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
|
||||
%ld = load <2 x i32>, <2 x i32> addrspace(4)* %in
|
||||
%ext = zext <2 x i32> %ld to <2 x i64>
|
||||
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
|
||||
|
@ -145,7 +201,7 @@ define amdgpu_kernel void @constant_zextload_v2i32_to_v2i64(<2 x i64> addrspace(
|
|||
; GCN-DAG: s_ashr_i32
|
||||
|
||||
; GCN: store_dwordx4
|
||||
define amdgpu_kernel void @constant_sextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(4)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_sextload_v2i32_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
|
||||
%ld = load <2 x i32>, <2 x i32> addrspace(4)* %in
|
||||
%ext = sext <2 x i32> %ld to <2 x i64>
|
||||
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
|
||||
|
@ -157,7 +213,7 @@ define amdgpu_kernel void @constant_sextload_v2i32_to_v2i64(<2 x i64> addrspace(
|
|||
|
||||
; GCN: store_dwordx4
|
||||
; GCN: store_dwordx4
|
||||
define amdgpu_kernel void @constant_zextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(4)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_zextload_v4i32_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
|
||||
%ld = load <4 x i32>, <4 x i32> addrspace(4)* %in
|
||||
%ext = zext <4 x i32> %ld to <4 x i64>
|
||||
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
|
||||
|
@ -174,7 +230,7 @@ define amdgpu_kernel void @constant_zextload_v4i32_to_v4i64(<4 x i64> addrspace(
|
|||
|
||||
; GCN: store_dwordx4
|
||||
; GCN: store_dwordx4
|
||||
define amdgpu_kernel void @constant_sextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(4)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_sextload_v4i32_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
|
||||
%ld = load <4 x i32>, <4 x i32> addrspace(4)* %in
|
||||
%ext = sext <4 x i32> %ld to <4 x i64>
|
||||
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
|
||||
|
@ -193,7 +249,7 @@ define amdgpu_kernel void @constant_sextload_v4i32_to_v4i64(<4 x i64> addrspace(
|
|||
; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
|
||||
; GCN-SA-DAG: {{flat|global}}_store_dwordx4
|
||||
; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
|
||||
define amdgpu_kernel void @constant_zextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(4)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_zextload_v8i32_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
|
||||
%ld = load <8 x i32>, <8 x i32> addrspace(4)* %in
|
||||
%ext = zext <8 x i32> %ld to <8 x i64>
|
||||
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
|
||||
|
@ -221,7 +277,7 @@ define amdgpu_kernel void @constant_zextload_v8i32_to_v8i64(<8 x i64> addrspace(
|
|||
; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
|
||||
; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
|
||||
; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
|
||||
define amdgpu_kernel void @constant_sextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(4)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_sextload_v8i32_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
|
||||
%ld = load <8 x i32>, <8 x i32> addrspace(4)* %in
|
||||
%ext = sext <8 x i32> %ld to <8 x i64>
|
||||
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
|
||||
|
@ -242,7 +298,7 @@ define amdgpu_kernel void @constant_sextload_v8i32_to_v8i64(<8 x i64> addrspace(
|
|||
; GCN: store_dwordx4
|
||||
; GCN: store_dwordx4
|
||||
; GCN: store_dwordx4
|
||||
define amdgpu_kernel void @constant_sextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(4)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_sextload_v16i32_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
|
||||
%ld = load <16 x i32>, <16 x i32> addrspace(4)* %in
|
||||
%ext = sext <16 x i32> %ld to <16 x i64>
|
||||
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
|
||||
|
@ -269,7 +325,7 @@ define amdgpu_kernel void @constant_sextload_v16i32_to_v16i64(<16 x i64> addrspa
|
|||
; GCN-HSA: {{flat|global}}_store_dwordx4
|
||||
; GCN-HSA: {{flat|global}}_store_dwordx4
|
||||
; GCN-HSA: {{flat|global}}_store_dwordx4
|
||||
define amdgpu_kernel void @constant_zextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(4)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_zextload_v16i32_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
|
||||
%ld = load <16 x i32>, <16 x i32> addrspace(4)* %in
|
||||
%ext = zext <16 x i32> %ld to <16 x i64>
|
||||
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
|
||||
|
@ -321,7 +377,7 @@ define amdgpu_kernel void @constant_zextload_v16i32_to_v16i64(<16 x i64> addrspa
|
|||
; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
|
||||
; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
|
||||
|
||||
define amdgpu_kernel void @constant_sextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(4)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_sextload_v32i32_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
|
||||
%ld = load <32 x i32>, <32 x i32> addrspace(4)* %in
|
||||
%ext = sext <32 x i32> %ld to <32 x i64>
|
||||
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
|
||||
|
@ -372,7 +428,7 @@ define amdgpu_kernel void @constant_sextload_v32i32_to_v32i64(<32 x i64> addrspa
|
|||
; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
|
||||
; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
|
||||
; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
|
||||
define amdgpu_kernel void @constant_zextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(4)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_zextload_v32i32_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
|
||||
%ld = load <32 x i32>, <32 x i32> addrspace(4)* %in
|
||||
%ext = zext <32 x i32> %ld to <32 x i64>
|
||||
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
|
||||
|
@ -424,7 +480,7 @@ define amdgpu_kernel void @constant_zextload_v32i32_to_v32i64(<32 x i64> addrspa
|
|||
; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
|
||||
; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
|
||||
; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
|
||||
define amdgpu_kernel void @constant_load_v32i32(<32 x i32> addrspace(1)* %out, <32 x i32> addrspace(4)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_load_v32i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
|
||||
%ld = load <32 x i32>, <32 x i32> addrspace(4)* %in
|
||||
store <32 x i32> %ld, <32 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
; GCN-HSA: flat_load_dword
|
||||
|
||||
; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
|
||||
define amdgpu_kernel void @global_load_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
define amdgpu_kernel void @global_load_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
entry:
|
||||
%tmp0 = load float, float addrspace(1)* %in
|
||||
store float %tmp0, float addrspace(1)* %out
|
||||
|
@ -22,7 +22,7 @@ entry:
|
|||
; GCN-HSA: flat_load_dwordx2
|
||||
|
||||
; R600: VTX_READ_64
|
||||
define amdgpu_kernel void @global_load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
|
||||
define amdgpu_kernel void @global_load_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
entry:
|
||||
%tmp0 = load <2 x float>, <2 x float> addrspace(1)* %in
|
||||
store <2 x float> %tmp0, <2 x float> addrspace(1)* %out
|
||||
|
@ -35,7 +35,7 @@ entry:
|
|||
; GCNX3-HSA: flat_load_dwordx3
|
||||
|
||||
; R600: VTX_READ_128
|
||||
define amdgpu_kernel void @global_load_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
|
||||
define amdgpu_kernel void @global_load_v3f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
entry:
|
||||
%tmp0 = load <3 x float>, <3 x float> addrspace(1)* %in
|
||||
store <3 x float> %tmp0, <3 x float> addrspace(1)* %out
|
||||
|
@ -47,7 +47,7 @@ entry:
|
|||
; GCN-HSA: flat_load_dwordx4
|
||||
|
||||
; R600: VTX_READ_128
|
||||
define amdgpu_kernel void @global_load_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
|
||||
define amdgpu_kernel void @global_load_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
entry:
|
||||
%tmp0 = load <4 x float>, <4 x float> addrspace(1)* %in
|
||||
store <4 x float> %tmp0, <4 x float> addrspace(1)* %out
|
||||
|
@ -62,13 +62,89 @@ entry:
|
|||
|
||||
; R600: VTX_READ_128
|
||||
; R600: VTX_READ_128
|
||||
define amdgpu_kernel void @global_load_v8f32(<8 x float> addrspace(1)* %out, <8 x float> addrspace(1)* %in) #0 {
|
||||
define amdgpu_kernel void @global_load_v8f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
entry:
|
||||
%tmp0 = load <8 x float>, <8 x float> addrspace(1)* %in
|
||||
store <8 x float> %tmp0, <8 x float> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}global_load_v9f32:
|
||||
; GCN-NOHSA: buffer_load_dword
|
||||
; GCN-NOHSA: buffer_load_dwordx4
|
||||
; GCN-NOHSA: buffer_load_dwordx4
|
||||
; GCN-HSA: flat_load_dwordx4
|
||||
; GCN-HSA: flat_load_dword
|
||||
; GCN-HSA: flat_load_dwordx4
|
||||
|
||||
; R600: VTX_READ_128
|
||||
; R600: VTX_READ_32
|
||||
; R600: VTX_READ_128
|
||||
define amdgpu_kernel void @global_load_v9f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
entry:
|
||||
%tmp0 = load <9 x float>, <9 x float> addrspace(1)* %in
|
||||
store <9 x float> %tmp0, <9 x float> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; FUNC-LABEL: {{^}}global_load_v10f32:
|
||||
; GCN-NOHSA: buffer_load_dwordx4
|
||||
; GCN-NOHSA: buffer_load_dwordx4
|
||||
; GCN-NOHSA: buffer_load_dwordx2
|
||||
; GCN-HSA: flat_load_dwordx4
|
||||
; GCN-HSA: flat_load_dwordx4
|
||||
; GCN-HSA: flat_load_dwordx2
|
||||
|
||||
; R600: VTX_READ_128
|
||||
; R600: VTX_READ_128
|
||||
; R600: VTX_READ_128
|
||||
define amdgpu_kernel void @global_load_v10f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
entry:
|
||||
%tmp0 = load <10 x float>, <10 x float> addrspace(1)* %in
|
||||
store <10 x float> %tmp0, <10 x float> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}global_load_v11f32:
|
||||
; SI-NOHSA: buffer_load_dwordx4
|
||||
; SI-NOHSA: buffer_load_dwordx4
|
||||
; SI-NOHSA: buffer_load_dwordx4
|
||||
; GCNX3-NOHSA: buffer_load_dwordx4
|
||||
; GCNX3-NOHSA: buffer_load_dwordx4
|
||||
; GCNX3-NOHSA: buffer_load_dwordx3
|
||||
; GCN-HSA: flat_load_dwordx4
|
||||
; GCN-HSA: flat_load_dwordx4
|
||||
; GCN-HSA: flat_load_dwordx3
|
||||
|
||||
; R600: VTX_READ_128
|
||||
; R600: VTX_READ_128
|
||||
; R600: VTX_READ_128
|
||||
define amdgpu_kernel void @global_load_v11f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
entry:
|
||||
%tmp0 = load <11 x float>, <11 x float> addrspace(1)* %in
|
||||
store <11 x float> %tmp0, <11 x float> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}global_load_v12f32:
|
||||
; GCN-NOHSA: buffer_load_dwordx4
|
||||
; GCN-NOHSA: buffer_load_dwordx4
|
||||
; GCN-NOHSA: buffer_load_dwordx4
|
||||
; GCN-HSA: flat_load_dwordx4
|
||||
; GCN-HSA: flat_load_dwordx4
|
||||
; GCN-HSA: flat_load_dwordx4
|
||||
|
||||
; R600: VTX_READ_128
|
||||
; R600: VTX_READ_128
|
||||
; R600: VTX_READ_128
|
||||
define amdgpu_kernel void @global_load_v12f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
entry:
|
||||
%tmp0 = load <12 x float>, <12 x float> addrspace(1)* %in
|
||||
store <12 x float> %tmp0, <12 x float> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}global_load_v16f32:
|
||||
; GCN-NOHSA: buffer_load_dwordx4
|
||||
; GCN-NOHSA: buffer_load_dwordx4
|
||||
|
@ -84,7 +160,7 @@ entry:
|
|||
; R600: VTX_READ_128
|
||||
; R600: VTX_READ_128
|
||||
; R600: VTX_READ_128
|
||||
define amdgpu_kernel void @global_load_v16f32(<16 x float> addrspace(1)* %out, <16 x float> addrspace(1)* %in) #0 {
|
||||
define amdgpu_kernel void @global_load_v16f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
entry:
|
||||
%tmp0 = load <16 x float>, <16 x float> addrspace(1)* %in
|
||||
store <16 x float> %tmp0, <16 x float> addrspace(1)* %out
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
; GCN-HSA: {{flat|global}}_load_dword
|
||||
|
||||
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
|
||||
define amdgpu_kernel void @global_load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
|
||||
define amdgpu_kernel void @global_load_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
entry:
|
||||
%ld = load i32, i32 addrspace(1)* %in
|
||||
store i32 %ld, i32 addrspace(1)* %out
|
||||
|
@ -22,7 +22,7 @@ entry:
|
|||
; GCN-HSA: {{flat|global}}_load_dwordx2
|
||||
|
||||
; EG: VTX_READ_64
|
||||
define amdgpu_kernel void @global_load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 {
|
||||
define amdgpu_kernel void @global_load_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
entry:
|
||||
%ld = load <2 x i32>, <2 x i32> addrspace(1)* %in
|
||||
store <2 x i32> %ld, <2 x i32> addrspace(1)* %out
|
||||
|
@ -35,7 +35,7 @@ entry:
|
|||
; GCNX3-HSA: {{flat|global}}_load_dwordx3
|
||||
|
||||
; EG: VTX_READ_128
|
||||
define amdgpu_kernel void @global_load_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %in) #0 {
|
||||
define amdgpu_kernel void @global_load_v3i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
entry:
|
||||
%ld = load <3 x i32>, <3 x i32> addrspace(1)* %in
|
||||
store <3 x i32> %ld, <3 x i32> addrspace(1)* %out
|
||||
|
@ -47,7 +47,7 @@ entry:
|
|||
; GCN-HSA: {{flat|global}}_load_dwordx4
|
||||
|
||||
; EG: VTX_READ_128
|
||||
define amdgpu_kernel void @global_load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
|
||||
define amdgpu_kernel void @global_load_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
entry:
|
||||
%ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
|
||||
store <4 x i32> %ld, <4 x i32> addrspace(1)* %out
|
||||
|
@ -62,13 +62,73 @@ entry:
|
|||
|
||||
; EG: VTX_READ_128
|
||||
; EG: VTX_READ_128
|
||||
define amdgpu_kernel void @global_load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 {
|
||||
define amdgpu_kernel void @global_load_v8i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
entry:
|
||||
%ld = load <8 x i32>, <8 x i32> addrspace(1)* %in
|
||||
store <8 x i32> %ld, <8 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}global_load_v9i32:
|
||||
; GCN-NOHSA: buffer_load_dwordx4
|
||||
; GCN-NOHSA: buffer_load_dwordx4
|
||||
; GCN-NOHSA: buffer_load_dword
|
||||
; GCN-HSA: {{flat|global}}_load_dwordx4
|
||||
; GCN-HSA: {{flat|global}}_load_dwordx4
|
||||
; GCN-HSA: {{flat|global}}_load_dword
|
||||
define amdgpu_kernel void @global_load_v9i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
entry:
|
||||
%ld = load <9 x i32>, <9 x i32> addrspace(1)* %in
|
||||
store <9 x i32> %ld, <9 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}global_load_v10i32:
|
||||
; GCN-NOHSA: buffer_load_dwordx4
|
||||
; GCN-NOHSA: buffer_load_dwordx4
|
||||
; GCN-NOHSA: buffer_load_dwordx2
|
||||
; GCN-HSA: {{flat|global}}_load_dwordx4
|
||||
; GCN-HSA: {{flat|global}}_load_dwordx4
|
||||
; GCN-HSA: {{flat|global}}_load_dwordx2
|
||||
define amdgpu_kernel void @global_load_v10i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
entry:
|
||||
%ld = load <10 x i32>, <10 x i32> addrspace(1)* %in
|
||||
store <10 x i32> %ld, <10 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}global_load_v11i32:
|
||||
; SI-NOHSA: buffer_load_dwordx4
|
||||
; SI-NOHSA: buffer_load_dwordx4
|
||||
; SI-NOHSA: buffer_load_dwordx4
|
||||
; GCNX3-NOHSA: buffer_load_dwordx4
|
||||
; GCNX3-NOHSA: buffer_load_dwordx4
|
||||
; GCNX3-NOHSA: buffer_load_dwordx3
|
||||
; GCN-HSA: {{flat|global}}_load_dwordx4
|
||||
; GCN-HSA: {{flat|global}}_load_dwordx4
|
||||
; GCN-HSA: {{flat|global}}_load_dwordx3
|
||||
define amdgpu_kernel void @global_load_v11i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
entry:
|
||||
%ld = load <11 x i32>, <11 x i32> addrspace(1)* %in
|
||||
store <11 x i32> %ld, <11 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; FUNC-LABEL: {{^}}global_load_v12i32:
|
||||
; GCN-NOHSA: buffer_load_dwordx4
|
||||
; GCN-NOHSA: buffer_load_dwordx4
|
||||
; GCN-NOHSA: buffer_load_dwordx4
|
||||
; GCN-HSA: {{flat|global}}_load_dwordx4
|
||||
; GCN-HSA: {{flat|global}}_load_dwordx4
|
||||
; GCN-HSA: {{flat|global}}_load_dwordx4
|
||||
define amdgpu_kernel void @global_load_v12i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
entry:
|
||||
%ld = load <12 x i32>, <12 x i32> addrspace(1)* %in
|
||||
store <12 x i32> %ld, <12 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}global_load_v16i32:
|
||||
; GCN-NOHSA: buffer_load_dwordx4
|
||||
; GCN-NOHSA: buffer_load_dwordx4
|
||||
|
@ -84,7 +144,7 @@ entry:
|
|||
; EG: VTX_READ_128
|
||||
; EG: VTX_READ_128
|
||||
; EG: VTX_READ_128
|
||||
define amdgpu_kernel void @global_load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 {
|
||||
define amdgpu_kernel void @global_load_v16i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
entry:
|
||||
%ld = load <16 x i32>, <16 x i32> addrspace(1)* %in
|
||||
store <16 x i32> %ld, <16 x i32> addrspace(1)* %out
|
||||
|
@ -100,7 +160,7 @@ entry:
|
|||
; GCN-HSA: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
|
||||
|
||||
; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
|
||||
define amdgpu_kernel void @global_zextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
|
||||
define amdgpu_kernel void @global_zextload_i32_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
%ld = load i32, i32 addrspace(1)* %in
|
||||
%ext = zext i32 %ld to i64
|
||||
store i64 %ext, i64 addrspace(1)* %out
|
||||
|
@ -119,7 +179,7 @@ define amdgpu_kernel void @global_zextload_i32_to_i64(i64 addrspace(1)* %out, i3
|
|||
; EG: VTX_READ_32
|
||||
; EG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.
|
||||
; EG: 31
|
||||
define amdgpu_kernel void @global_sextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
|
||||
define amdgpu_kernel void @global_sextload_i32_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
%ld = load i32, i32 addrspace(1)* %in
|
||||
%ext = sext i32 %ld to i64
|
||||
store i64 %ext, i64 addrspace(1)* %out
|
||||
|
@ -132,7 +192,7 @@ define amdgpu_kernel void @global_sextload_i32_to_i64(i64 addrspace(1)* %out, i3
|
|||
|
||||
; GCN-HSA: {{flat|global}}_load_dword
|
||||
; GCN-HSA: {{flat|global}}_store_dwordx2
|
||||
define amdgpu_kernel void @global_zextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* %in) #0 {
|
||||
define amdgpu_kernel void @global_zextload_v1i32_to_v1i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
%ld = load <1 x i32>, <1 x i32> addrspace(1)* %in
|
||||
%ext = zext <1 x i32> %ld to <1 x i64>
|
||||
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
|
||||
|
@ -145,7 +205,7 @@ define amdgpu_kernel void @global_zextload_v1i32_to_v1i64(<1 x i64> addrspace(1)
|
|||
; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
|
||||
; GCN-NOHSA: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
|
||||
; GCN-HSA: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
|
||||
define amdgpu_kernel void @global_sextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* %in) #0 {
|
||||
define amdgpu_kernel void @global_sextload_v1i32_to_v1i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
%ld = load <1 x i32>, <1 x i32> addrspace(1)* %in
|
||||
%ext = sext <1 x i32> %ld to <1 x i64>
|
||||
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
|
||||
|
@ -158,7 +218,7 @@ define amdgpu_kernel void @global_sextload_v1i32_to_v1i64(<1 x i64> addrspace(1)
|
|||
|
||||
; GCN-HSA: {{flat|global}}_load_dwordx2
|
||||
; GCN-HSA: {{flat|global}}_store_dwordx4
|
||||
define amdgpu_kernel void @global_zextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 {
|
||||
define amdgpu_kernel void @global_zextload_v2i32_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
%ld = load <2 x i32>, <2 x i32> addrspace(1)* %in
|
||||
%ext = zext <2 x i32> %ld to <2 x i64>
|
||||
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
|
||||
|
@ -174,7 +234,7 @@ define amdgpu_kernel void @global_zextload_v2i32_to_v2i64(<2 x i64> addrspace(1)
|
|||
|
||||
; GCN-NOHSA-DAG: buffer_store_dwordx4
|
||||
; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
|
||||
define amdgpu_kernel void @global_sextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 {
|
||||
define amdgpu_kernel void @global_sextload_v2i32_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
%ld = load <2 x i32>, <2 x i32> addrspace(1)* %in
|
||||
%ext = sext <2 x i32> %ld to <2 x i64>
|
||||
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
|
||||
|
@ -189,7 +249,7 @@ define amdgpu_kernel void @global_sextload_v2i32_to_v2i64(<2 x i64> addrspace(1)
|
|||
; GCN-HSA: {{flat|global}}_load_dwordx4
|
||||
; GCN-HSA: {{flat|global}}_store_dwordx4
|
||||
; GCN-HSA: {{flat|global}}_store_dwordx4
|
||||
define amdgpu_kernel void @global_zextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
|
||||
define amdgpu_kernel void @global_zextload_v4i32_to_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
%ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
|
||||
%ext = zext <4 x i32> %ld to <4 x i64>
|
||||
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
|
||||
|
@ -210,7 +270,7 @@ define amdgpu_kernel void @global_zextload_v4i32_to_v4i64(<4 x i64> addrspace(1)
|
|||
|
||||
; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
|
||||
; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
|
||||
define amdgpu_kernel void @global_sextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
|
||||
define amdgpu_kernel void @global_sextload_v4i32_to_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
%ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
|
||||
%ext = sext <4 x i32> %ld to <4 x i64>
|
||||
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
|
||||
|
@ -233,7 +293,7 @@ define amdgpu_kernel void @global_sextload_v4i32_to_v4i64(<4 x i64> addrspace(1)
|
|||
; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
|
||||
; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
|
||||
; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
|
||||
define amdgpu_kernel void @global_zextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 {
|
||||
define amdgpu_kernel void @global_zextload_v8i32_to_v8i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
%ld = load <8 x i32>, <8 x i32> addrspace(1)* %in
|
||||
%ext = zext <8 x i32> %ld to <8 x i64>
|
||||
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
|
||||
|
@ -265,7 +325,7 @@ define amdgpu_kernel void @global_zextload_v8i32_to_v8i64(<8 x i64> addrspace(1)
|
|||
; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
|
||||
; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
|
||||
; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
|
||||
define amdgpu_kernel void @global_sextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 {
|
||||
define amdgpu_kernel void @global_sextload_v8i32_to_v8i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
%ld = load <8 x i32>, <8 x i32> addrspace(1)* %in
|
||||
%ext = sext <8 x i32> %ld to <8 x i64>
|
||||
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
|
||||
|
@ -311,7 +371,7 @@ define amdgpu_kernel void @global_sextload_v8i32_to_v8i64(<8 x i64> addrspace(1)
|
|||
; GCN-DAG: v_ashrrev_i32
|
||||
; GCN-NOHSA-DAG: buffer_store_dwordx4
|
||||
; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
|
||||
define amdgpu_kernel void @global_sextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 {
|
||||
define amdgpu_kernel void @global_sextload_v16i32_to_v16i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
%ld = load <16 x i32>, <16 x i32> addrspace(1)* %in
|
||||
%ext = sext <16 x i32> %ld to <16 x i64>
|
||||
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
|
||||
|
@ -346,7 +406,7 @@ define amdgpu_kernel void @global_sextload_v16i32_to_v16i64(<16 x i64> addrspace
|
|||
; GCN-HSA: {{flat|global}}_store_dwordx4
|
||||
; GCN-HSA: {{flat|global}}_store_dwordx4
|
||||
; GCN-HSA: {{flat|global}}_store_dwordx4
|
||||
define amdgpu_kernel void @global_zextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 {
|
||||
define amdgpu_kernel void @global_zextload_v16i32_to_v16i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
%ld = load <16 x i32>, <16 x i32> addrspace(1)* %in
|
||||
%ext = zext <16 x i32> %ld to <16 x i64>
|
||||
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
|
||||
|
@ -446,7 +506,7 @@ define amdgpu_kernel void @global_zextload_v16i32_to_v16i64(<16 x i64> addrspace
|
|||
; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
|
||||
; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
|
||||
|
||||
define amdgpu_kernel void @global_sextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* %in) #0 {
|
||||
define amdgpu_kernel void @global_sextload_v32i32_to_v32i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
%ld = load <32 x i32>, <32 x i32> addrspace(1)* %in
|
||||
%ext = sext <32 x i32> %ld to <32 x i64>
|
||||
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
|
||||
|
@ -513,7 +573,7 @@ define amdgpu_kernel void @global_sextload_v32i32_to_v32i64(<32 x i64> addrspace
|
|||
; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
|
||||
; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
|
||||
; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
|
||||
define amdgpu_kernel void @global_zextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* %in) #0 {
|
||||
define amdgpu_kernel void @global_zextload_v32i32_to_v32i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
%ld = load <32 x i32>, <32 x i32> addrspace(1)* %in
|
||||
%ext = zext <32 x i32> %ld to <32 x i64>
|
||||
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
|
||||
|
@ -581,7 +641,7 @@ define amdgpu_kernel void @global_zextload_v32i32_to_v32i64(<32 x i64> addrspace
|
|||
; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
|
||||
; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
|
||||
; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
|
||||
define amdgpu_kernel void @global_load_v32i32(<32 x i32> addrspace(1)* %out, <32 x i32> addrspace(1)* %in) #0 {
|
||||
define amdgpu_kernel void @global_load_v32i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
%ld = load <32 x i32>, <32 x i32> addrspace(1)* %in
|
||||
store <32 x i32> %ld, <32 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
|
|
|
@ -714,7 +714,7 @@ body: |
|
|||
|
||||
|
||||
# GFX11-LABEL: name: image_sample_c_d_cl_o_merged_v1v3
|
||||
# GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_O_V4_V16_gfx11 %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
|
||||
# GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_O_V4_V9_gfx11 %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
|
||||
# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
|
||||
# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
|
||||
|
||||
|
@ -726,9 +726,9 @@ body: |
|
|||
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
|
||||
%3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
|
||||
%4:vgpr_32 = COPY %2.sub3
|
||||
%5:vreg_512 = IMPLICIT_DEF
|
||||
%6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_O_V1_V16_gfx11 %5:vreg_512, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
%7:vreg_96 = IMAGE_SAMPLE_C_D_CL_O_V3_V16_gfx11 %5:vreg_512, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
%5:vreg_288 = IMPLICIT_DEF
|
||||
%6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_O_V1_V9_gfx11 %5:vreg_288, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
%7:vreg_96 = IMAGE_SAMPLE_C_D_CL_O_V3_V9_gfx11 %5:vreg_288, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
...
|
||||
---
|
||||
|
||||
|
|
|
@ -5,14 +5,19 @@
|
|||
define amdgpu_kernel void @select_f16(
|
||||
; SI-LABEL: select_f16:
|
||||
; SI: ; %bb.0: ; %entry
|
||||
; SI-NEXT: s_mov_b32 s24, SCRATCH_RSRC_DWORD0
|
||||
; SI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x9
|
||||
; SI-NEXT: s_mov_b32 s25, SCRATCH_RSRC_DWORD1
|
||||
; SI-NEXT: s_mov_b32 s26, -1
|
||||
; SI-NEXT: s_mov_b32 s27, 0xe8f000
|
||||
; SI-NEXT: s_add_u32 s24, s24, s3
|
||||
; SI-NEXT: s_mov_b32 s3, 0xf000
|
||||
; SI-NEXT: s_mov_b32 s2, -1
|
||||
; SI-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x11
|
||||
; SI-NEXT: s_mov_b32 s18, s2
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: s_mov_b32 s16, s6
|
||||
; SI-NEXT: s_mov_b32 s17, s7
|
||||
; SI-NEXT: s_mov_b32 s18, s2
|
||||
; SI-NEXT: s_mov_b32 s19, s3
|
||||
; SI-NEXT: s_mov_b32 s20, s8
|
||||
; SI-NEXT: s_mov_b32 s21, s9
|
||||
|
@ -34,6 +39,7 @@ define amdgpu_kernel void @select_f16(
|
|||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: s_mov_b32 s0, s4
|
||||
; SI-NEXT: s_mov_b32 s1, s5
|
||||
; SI-NEXT: s_addc_u32 s25, s25, 0
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v2, v2
|
||||
|
@ -46,14 +52,19 @@ define amdgpu_kernel void @select_f16(
|
|||
;
|
||||
; VI-LABEL: select_f16:
|
||||
; VI: ; %bb.0: ; %entry
|
||||
; VI-NEXT: s_mov_b32 s24, SCRATCH_RSRC_DWORD0
|
||||
; VI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x24
|
||||
; VI-NEXT: s_mov_b32 s25, SCRATCH_RSRC_DWORD1
|
||||
; VI-NEXT: s_mov_b32 s26, -1
|
||||
; VI-NEXT: s_mov_b32 s27, 0xe80000
|
||||
; VI-NEXT: s_add_u32 s24, s24, s3
|
||||
; VI-NEXT: s_mov_b32 s3, 0xf000
|
||||
; VI-NEXT: s_mov_b32 s2, -1
|
||||
; VI-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x44
|
||||
; VI-NEXT: s_mov_b32 s18, s2
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: s_mov_b32 s16, s6
|
||||
; VI-NEXT: s_mov_b32 s17, s7
|
||||
; VI-NEXT: s_mov_b32 s18, s2
|
||||
; VI-NEXT: s_mov_b32 s19, s3
|
||||
; VI-NEXT: s_mov_b32 s20, s8
|
||||
; VI-NEXT: s_mov_b32 s21, s9
|
||||
|
@ -75,6 +86,7 @@ define amdgpu_kernel void @select_f16(
|
|||
; VI-NEXT: s_waitcnt vmcnt(0)
|
||||
; VI-NEXT: s_mov_b32 s0, s4
|
||||
; VI-NEXT: s_mov_b32 s1, s5
|
||||
; VI-NEXT: s_addc_u32 s25, s25, 0
|
||||
; VI-NEXT: v_cmp_lt_f16_e32 vcc, v0, v1
|
||||
; VI-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
|
||||
; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
|
||||
|
@ -420,14 +432,19 @@ entry:
|
|||
define amdgpu_kernel void @select_v2f16(
|
||||
; SI-LABEL: select_v2f16:
|
||||
; SI: ; %bb.0: ; %entry
|
||||
; SI-NEXT: s_mov_b32 s24, SCRATCH_RSRC_DWORD0
|
||||
; SI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x9
|
||||
; SI-NEXT: s_mov_b32 s25, SCRATCH_RSRC_DWORD1
|
||||
; SI-NEXT: s_mov_b32 s26, -1
|
||||
; SI-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x11
|
||||
; SI-NEXT: s_mov_b32 s27, 0xe8f000
|
||||
; SI-NEXT: s_add_u32 s24, s24, s3
|
||||
; SI-NEXT: s_mov_b32 s3, 0xf000
|
||||
; SI-NEXT: s_mov_b32 s2, -1
|
||||
; SI-NEXT: s_mov_b32 s18, s2
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: s_mov_b32 s16, s6
|
||||
; SI-NEXT: s_mov_b32 s17, s7
|
||||
; SI-NEXT: s_mov_b32 s18, s2
|
||||
; SI-NEXT: s_mov_b32 s19, s3
|
||||
; SI-NEXT: s_mov_b32 s20, s8
|
||||
; SI-NEXT: s_mov_b32 s21, s9
|
||||
|
@ -445,6 +462,7 @@ define amdgpu_kernel void @select_v2f16(
|
|||
; SI-NEXT: buffer_load_dword v3, off, s[8:11], 0
|
||||
; SI-NEXT: s_mov_b32 s0, s4
|
||||
; SI-NEXT: s_mov_b32 s1, s5
|
||||
; SI-NEXT: s_addc_u32 s25, s25, 0
|
||||
; SI-NEXT: s_waitcnt vmcnt(3)
|
||||
; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v1
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v5, v5
|
||||
|
@ -474,8 +492,13 @@ define amdgpu_kernel void @select_v2f16(
|
|||
;
|
||||
; VI-LABEL: select_v2f16:
|
||||
; VI: ; %bb.0: ; %entry
|
||||
; VI-NEXT: s_mov_b32 s24, SCRATCH_RSRC_DWORD0
|
||||
; VI-NEXT: s_mov_b32 s25, SCRATCH_RSRC_DWORD1
|
||||
; VI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x24
|
||||
; VI-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x44
|
||||
; VI-NEXT: s_mov_b32 s26, -1
|
||||
; VI-NEXT: s_mov_b32 s27, 0xe80000
|
||||
; VI-NEXT: s_add_u32 s24, s24, s3
|
||||
; VI-NEXT: s_mov_b32 s3, 0xf000
|
||||
; VI-NEXT: s_mov_b32 s2, -1
|
||||
; VI-NEXT: s_mov_b32 s14, s2
|
||||
|
@ -499,6 +522,7 @@ define amdgpu_kernel void @select_v2f16(
|
|||
; VI-NEXT: buffer_load_dword v3, off, s[8:11], 0
|
||||
; VI-NEXT: s_mov_b32 s0, s4
|
||||
; VI-NEXT: s_mov_b32 s1, s5
|
||||
; VI-NEXT: s_addc_u32 s25, s25, 0
|
||||
; VI-NEXT: s_waitcnt vmcnt(3)
|
||||
; VI-NEXT: v_lshrrev_b32_e32 v4, 16, v0
|
||||
; VI-NEXT: s_waitcnt vmcnt(2)
|
||||
|
|
|
@ -67,14 +67,19 @@ entry:
|
|||
define amdgpu_kernel void @madak_f16_use_2(
|
||||
; SI-LABEL: madak_f16_use_2:
|
||||
; SI: ; %bb.0: ; %entry
|
||||
; SI-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
|
||||
; SI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x9
|
||||
; SI-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
|
||||
; SI-NEXT: s_mov_b32 s22, -1
|
||||
; SI-NEXT: s_mov_b32 s23, 0xe8f000
|
||||
; SI-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x11
|
||||
; SI-NEXT: s_add_u32 s20, s20, s3
|
||||
; SI-NEXT: s_mov_b32 s3, 0xf000
|
||||
; SI-NEXT: s_mov_b32 s2, -1
|
||||
; SI-NEXT: s_mov_b32 s18, s2
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: s_mov_b32 s16, s8
|
||||
; SI-NEXT: s_mov_b32 s17, s9
|
||||
; SI-NEXT: s_mov_b32 s18, s2
|
||||
; SI-NEXT: s_mov_b32 s19, s3
|
||||
; SI-NEXT: s_mov_b32 s8, s10
|
||||
; SI-NEXT: s_mov_b32 s9, s11
|
||||
|
@ -91,6 +96,7 @@ define amdgpu_kernel void @madak_f16_use_2(
|
|||
; SI-NEXT: v_mov_b32_e32 v3, 0x41200000
|
||||
; SI-NEXT: s_mov_b32 s0, s4
|
||||
; SI-NEXT: s_mov_b32 s1, s5
|
||||
; SI-NEXT: s_addc_u32 s21, s21, 0
|
||||
; SI-NEXT: s_mov_b32 s8, s6
|
||||
; SI-NEXT: s_mov_b32 s9, s7
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
|
||||
|
@ -106,14 +112,19 @@ define amdgpu_kernel void @madak_f16_use_2(
|
|||
;
|
||||
; VI-LABEL: madak_f16_use_2:
|
||||
; VI: ; %bb.0: ; %entry
|
||||
; VI-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
|
||||
; VI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x24
|
||||
; VI-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
|
||||
; VI-NEXT: s_mov_b32 s22, -1
|
||||
; VI-NEXT: s_mov_b32 s23, 0xe80000
|
||||
; VI-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x44
|
||||
; VI-NEXT: s_add_u32 s20, s20, s3
|
||||
; VI-NEXT: s_mov_b32 s3, 0xf000
|
||||
; VI-NEXT: s_mov_b32 s2, -1
|
||||
; VI-NEXT: s_mov_b32 s18, s2
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: s_mov_b32 s16, s8
|
||||
; VI-NEXT: s_mov_b32 s17, s9
|
||||
; VI-NEXT: s_mov_b32 s18, s2
|
||||
; VI-NEXT: s_mov_b32 s19, s3
|
||||
; VI-NEXT: s_mov_b32 s8, s10
|
||||
; VI-NEXT: s_mov_b32 s9, s11
|
||||
|
@ -130,6 +141,7 @@ define amdgpu_kernel void @madak_f16_use_2(
|
|||
; VI-NEXT: v_mov_b32_e32 v3, 0x4900
|
||||
; VI-NEXT: s_mov_b32 s0, s4
|
||||
; VI-NEXT: s_mov_b32 s1, s5
|
||||
; VI-NEXT: s_addc_u32 s21, s21, 0
|
||||
; VI-NEXT: s_mov_b32 s8, s6
|
||||
; VI-NEXT: s_mov_b32 s9, s7
|
||||
; VI-NEXT: v_madak_f16 v1, v0, v1, 0x4900
|
||||
|
|
|
@ -8,11 +8,11 @@ body: |
|
|||
; GCN-LABEL: name: waitcnt-check-inorder
|
||||
; GCN: S_WAITCNT 0
|
||||
; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
|
||||
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
||||
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
||||
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
||||
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -22,11 +22,11 @@ body: |
|
|||
; GCN-LABEL: name: waitcnt-check-vs-vmem
|
||||
; GCN: S_WAITCNT 0
|
||||
; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
|
||||
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
||||
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
||||
; GCN-NEXT: S_WAITCNT 16240
|
||||
; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
@ -37,11 +37,11 @@ body: |
|
|||
; GCN-LABEL: name: waitcnt-check-vs-mimg-samp
|
||||
; GCN: S_WAITCNT 0
|
||||
; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
|
||||
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
||||
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
||||
; GCN-NEXT: S_WAITCNT 16240
|
||||
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2 $vgpr20_vgpr21, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2 $vgpr20_vgpr21, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
@ -54,10 +54,10 @@ body: |
|
|||
; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
|
||||
; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
|
||||
; GCN-NEXT: S_WAITCNT 16240
|
||||
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
||||
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -69,9 +69,9 @@ body: |
|
|||
; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
|
||||
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2 $vgpr16_vgpr17, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
|
||||
; GCN-NEXT: S_WAITCNT 16240
|
||||
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
||||
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2 $vgpr16_vgpr17, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
|
|
@ -1,28 +1,28 @@
|
|||
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1013 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck %s
|
||||
|
||||
image_bvh64_intersect_ray v[5:8], v[1:16], s[8:11]
|
||||
image_bvh64_intersect_ray v[5:8], v[1:12], s[8:11]
|
||||
// CHECK: [0x01,0x9f,0x9c,0xf1,0x01,0x05,0x02,0x00]
|
||||
|
||||
image_bvh64_intersect_ray v[5:8], v[240:255], s[8:11] a16
|
||||
// CHECK: [0x01,0x9f,0x9c,0xf1,0xf0,0x05,0x02,0x40]
|
||||
image_bvh64_intersect_ray v[5:8], v[247:255], s[8:11] a16
|
||||
// CHECK: [0x01,0x9f,0x9c,0xf1,0xf7,0x05,0x02,0x40]
|
||||
|
||||
image_bvh64_intersect_ray v[5:8], v[1:16], ttmp[12:15]
|
||||
image_bvh64_intersect_ray v[5:8], v[1:12], ttmp[12:15]
|
||||
// CHECK: [0x01,0x9f,0x9c,0xf1,0x01,0x05,0x1e,0x00]
|
||||
|
||||
image_bvh64_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19, v37, v40, v42], s[12:15]
|
||||
// CHECK: encoding: [0x07,0x9f,0x9c,0xf1,0x32,0x27,0x03,0x00,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x13,0x25,0x28,0x2a,0x00]
|
||||
// CHECK: [0x07,0x9f,0x9c,0xf1,0x32,0x27,0x03,0x00,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x13,0x25,0x28,0x2a,0x00]
|
||||
|
||||
image_bvh_intersect_ray v[252:255], v[1:16], s[8:11]
|
||||
image_bvh_intersect_ray v[252:255], v[1:11], s[8:11]
|
||||
// CHECK: [0x01,0x9f,0x98,0xf1,0x01,0xfc,0x02,0x00]
|
||||
|
||||
image_bvh_intersect_ray v[5:8], v[248:255], s[8:11] a16
|
||||
// CHECK: [0x01,0x9f,0x98,0xf1,0xf8,0x05,0x02,0x40]
|
||||
|
||||
image_bvh_intersect_ray v[5:8], v[1:16], ttmp[12:15]
|
||||
image_bvh_intersect_ray v[5:8], v[1:11], ttmp[12:15]
|
||||
// CHECK: [0x01,0x9f,0x98,0xf1,0x01,0x05,0x1e,0x00]
|
||||
|
||||
image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19, v37, v40], s[12:15]
|
||||
// CHECK: encoding: [0x07,0x9f,0x98,0xf1,0x32,0x27,0x03,0x00,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x13,0x25,0x28,0x00,0x00]
|
||||
// CHECK: [0x07,0x9f,0x98,0xf1,0x32,0x27,0x03,0x00,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x13,0x25,0x28,0x00,0x00]
|
||||
|
||||
image_msaa_load v[5:6], v[1:4], s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY
|
||||
// CHECK: [0x39,0x03,0x00,0xf0,0x01,0x05,0x02,0x00]
|
||||
|
|
|
@ -84,16 +84,16 @@ v_fmac_legacy_f32 v0, |v1|, -v2
|
|||
v_fmac_legacy_f32 v0, s1, 2.0
|
||||
// GFX10: encoding: [0x00,0x00,0x06,0xd5,0x01,0xe8,0x01,0x00]
|
||||
|
||||
image_bvh_intersect_ray v[4:7], v[9:24], s[4:7]
|
||||
image_bvh_intersect_ray v[4:7], v[9:19], s[4:7]
|
||||
// GFX10: encoding: [0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x00]
|
||||
|
||||
image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16
|
||||
// GFX10: encoding: [0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x40]
|
||||
|
||||
image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7]
|
||||
image_bvh64_intersect_ray v[4:7], v[9:20], s[4:7]
|
||||
// GFX10: encoding: [0x01,0x9f,0x9c,0xf1,0x09,0x04,0x01,0x00]
|
||||
|
||||
image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] a16
|
||||
image_bvh64_intersect_ray v[4:7], v[9:17], s[4:7] a16
|
||||
// GFX10: encoding: [0x01,0x9f,0x9c,0xf1,0x09,0x04,0x01,0x40]
|
||||
|
||||
image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19, v37, v40], s[12:15]
|
||||
|
|
|
@ -298,8 +298,8 @@ image_sample_d v[64:66], v[32:37], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG
|
|||
image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1, v0, v20, v21], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
|
||||
; GFX10: image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1, v0, v20, v21], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x07,0x88,0xf0,0x20,0x40,0x21,0x03,0x10,0x08,0x04,0x02,0x01,0x00,0x14,0x15]
|
||||
|
||||
image_sample_d v[64:66], v[32:47], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
|
||||
; GFX10: image_sample_d v[64:66], v[32:47], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x10,0x07,0x88,0xf0,0x20,0x40,0x21,0x03]
|
||||
image_sample_d v[64:66], v[32:40], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
|
||||
; GFX10: image_sample_d v[64:66], v[32:40], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x10,0x07,0x88,0xf0,0x20,0x40,0x21,0x03]
|
||||
|
||||
image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1, v5], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_CUBE
|
||||
; GFX10: image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1, v5], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1c,0x07,0x88,0xf0,0x20,0x40,0x21,0x03,0x10,0x08,0x04,0x02,0x01,0x05,0x00,0x00]
|
||||
|
|
|
@ -761,10 +761,10 @@ global_store_d16_hi_b8 v1, v2, s[104:105]
|
|||
global_store_dword_addtid v1, off offset:16 glc slc dlc
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
image_bvh64_intersect_ray v[252:255], v[240:255], ttmp[12:15] a16
|
||||
image_bvh64_intersect_ray v[252:255], v[247:255], ttmp[12:15] a16
|
||||
// GFX1010: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
image_bvh_intersect_ray v[252:255], v[1:16], s[8:11]
|
||||
image_bvh_intersect_ray v[252:255], v[1:11], s[8:11]
|
||||
// GFX1010: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
image_msaa_load v14, [v204,v11,v14,v19], s[40:47] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY
|
||||
|
|
|
@ -1248,23 +1248,23 @@ image_atomic_xor v[1:2], v[2:3], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA uno
|
|||
image_atomic_xor v[254:255], v[254:255], ttmp[8:15] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA unorm glc slc dlc a16 lwe
|
||||
// GFX11: [0x98,0x73,0x51,0xf0,0xfe,0xfe,0x5d,0x00]
|
||||
|
||||
image_bvh64_intersect_ray v[5:8], v[1:16], s[8:11]
|
||||
image_bvh64_intersect_ray v[5:8], v[1:12], s[8:11]
|
||||
// GFX11: [0x80,0x8f,0x68,0xf0,0x01,0x05,0x02,0x00]
|
||||
|
||||
image_bvh64_intersect_ray v[5:8], v[240:255], s[8:11]
|
||||
// GFX11: [0x80,0x8f,0x68,0xf0,0xf0,0x05,0x02,0x00]
|
||||
image_bvh64_intersect_ray v[5:8], v[244:255], s[8:11]
|
||||
// GFX11: [0x80,0x8f,0x68,0xf0,0xf4,0x05,0x02,0x00]
|
||||
|
||||
image_bvh64_intersect_ray v[5:8], v[1:16], s[100:103] a16
|
||||
image_bvh64_intersect_ray v[5:8], v[1:9], s[100:103] a16
|
||||
// GFX11: [0x80,0x8f,0x69,0xf0,0x01,0x05,0x19,0x00]
|
||||
|
||||
image_bvh64_intersect_ray v[252:255], v[240:255], ttmp[12:15] a16
|
||||
// GFX11: [0x80,0x8f,0x69,0xf0,0xf0,0xfc,0x1e,0x00]
|
||||
image_bvh64_intersect_ray v[252:255], v[247:255], ttmp[12:15] a16
|
||||
// GFX11: [0x80,0x8f,0x69,0xf0,0xf7,0xfc,0x1e,0x00]
|
||||
|
||||
image_bvh_intersect_ray v[5:8], v[1:16], s[8:11]
|
||||
image_bvh_intersect_ray v[5:8], v[1:11], s[8:11]
|
||||
// GFX11: [0x80,0x8f,0x64,0xf0,0x01,0x05,0x02,0x00]
|
||||
|
||||
image_bvh_intersect_ray v[5:8], v[240:255], s[8:11]
|
||||
// GFX11: [0x80,0x8f,0x64,0xf0,0xf0,0x05,0x02,0x00]
|
||||
image_bvh_intersect_ray v[5:8], v[245:255], s[8:11]
|
||||
// GFX11: [0x80,0x8f,0x64,0xf0,0xf5,0x05,0x02,0x00]
|
||||
|
||||
image_bvh_intersect_ray v[5:8], v[1:8], s[100:103] a16
|
||||
// GFX11: [0x80,0x8f,0x65,0xf0,0x01,0x05,0x19,0x00]
|
||||
|
@ -3264,17 +3264,17 @@ image_sample_c_d v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D
|
|||
image_sample_c_d v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
|
||||
// GFX11: [0x00,0x03,0x85,0xf0,0xfc,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
image_sample_c_d v[5:6], v[1:10], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: [0x08,0x03,0x84,0xf0,0x01,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
image_sample_c_d v[5:6], v[240:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: [0x08,0x03,0x84,0xf0,0xf0,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
|
||||
image_sample_c_d v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
|
||||
// GFX11: [0x08,0x03,0x85,0xf0,0x01,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
|
||||
// GFX11: [0x08,0x03,0x85,0xf0,0xf0,0x05,0x02,0x0c]
|
||||
image_sample_c_d v[5:6], v[241:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
|
||||
// GFX11: [0x08,0x03,0x85,0xf0,0xf1,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D
|
||||
// GFX11: [0x04,0x03,0x84,0xf0,0x01,0x05,0x02,0x0c]
|
||||
|
@ -3336,17 +3336,17 @@ image_sample_c_d_cl v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_
|
|||
image_sample_c_d_cl v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
|
||||
// GFX11: [0x00,0x03,0x11,0xf1,0xfc,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_cl v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
image_sample_c_d_cl v[5:6], v[1:11], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: [0x08,0x03,0x10,0xf1,0x01,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_cl v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: [0x08,0x03,0x10,0xf1,0xf0,0x05,0x02,0x0c]
|
||||
image_sample_c_d_cl v[5:6], v[241:251], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: [0x08,0x03,0x10,0xf1,0xf1,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_cl v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
|
||||
image_sample_c_d_cl v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
|
||||
// GFX11: [0x08,0x03,0x11,0xf1,0x01,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_cl v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
|
||||
// GFX11: [0x08,0x03,0x11,0xf1,0xf0,0x05,0x02,0x0c]
|
||||
image_sample_c_d_cl v[5:6], v[241:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
|
||||
// GFX11: [0x08,0x03,0x11,0xf1,0xf1,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_cl v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D
|
||||
// GFX11: [0x04,0x03,0x10,0xf1,0x01,0x05,0x02,0x0c]
|
||||
|
@ -3360,11 +3360,11 @@ image_sample_c_d_cl v[5:7], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_
|
|||
image_sample_c_d_cl v[253:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe
|
||||
// GFX11: [0x04,0x03,0x11,0xf1,0xf9,0xfd,0x22,0x0c]
|
||||
|
||||
image_sample_c_d_cl v5, v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16
|
||||
image_sample_c_d_cl v5, v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16
|
||||
// GFX11: [0x0c,0x03,0x12,0xf1,0x01,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_cl v255, v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16
|
||||
// GFX11: [0x0c,0x03,0x12,0xf1,0xf0,0xff,0x02,0x0c]
|
||||
image_sample_c_d_cl v255, v[241:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16
|
||||
// GFX11: [0x0c,0x03,0x12,0xf1,0xf1,0xff,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_cl v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16
|
||||
// GFX11: [0x0c,0x03,0x13,0xf1,0x01,0x05,0x22,0x0c]
|
||||
|
@ -3384,11 +3384,11 @@ image_sample_c_d_cl v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_
|
|||
image_sample_c_d_cl v[254:255], v[251:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe
|
||||
// GFX11: [0x10,0x04,0x11,0xf1,0xfb,0xfe,0x22,0x0c]
|
||||
|
||||
image_sample_c_d_cl v5, v[1:16], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16
|
||||
image_sample_c_d_cl v5, v[1:9], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16
|
||||
// GFX11: [0x14,0x04,0x12,0xf1,0x01,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_cl v255, v[240:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16
|
||||
// GFX11: [0x14,0x04,0x12,0xf1,0xf0,0xff,0x02,0x0c]
|
||||
image_sample_c_d_cl v255, v[241:249], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16
|
||||
// GFX11: [0x14,0x04,0x12,0xf1,0xf1,0xff,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_cl v[5:6], v[1:7], s[96:103], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 tfe d16
|
||||
// GFX11: [0x14,0x04,0x13,0xf1,0x01,0x05,0x38,0x64]
|
||||
|
@ -3408,11 +3408,11 @@ image_sample_c_d_cl_g16 v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_
|
|||
image_sample_c_d_cl_g16 v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
|
||||
// GFX11: [0x00,0x03,0x51,0xf1,0xfc,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_cl_g16 v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
image_sample_c_d_cl_g16 v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: [0x08,0x03,0x50,0xf1,0x01,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_cl_g16 v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: [0x08,0x03,0x50,0xf1,0xf0,0x05,0x02,0x0c]
|
||||
image_sample_c_d_cl_g16 v[5:6], v[241:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: [0x08,0x03,0x50,0xf1,0xf1,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_cl_g16 v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
|
||||
// GFX11: [0x08,0x03,0x51,0xf1,0x01,0x05,0x02,0x0c]
|
||||
|
@ -3480,23 +3480,23 @@ image_sample_c_d_cl_o v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IM
|
|||
image_sample_c_d_cl_o v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
|
||||
// GFX11: [0x00,0x03,0x29,0xf1,0xfb,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_cl_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
image_sample_c_d_cl_o v[5:6], v[1:12], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: [0x08,0x03,0x28,0xf1,0x01,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_cl_o v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
image_sample_c_d_cl_o v[5:6], v[240:251], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: [0x08,0x03,0x28,0xf1,0xf0,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_cl_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
|
||||
image_sample_c_d_cl_o v[5:6], v[1:10], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
|
||||
// GFX11: [0x08,0x03,0x29,0xf1,0x01,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_cl_o v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
|
||||
image_sample_c_d_cl_o v[5:6], v[240:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
|
||||
// GFX11: [0x08,0x03,0x29,0xf1,0xf0,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_cl_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D
|
||||
image_sample_c_d_cl_o v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D
|
||||
// GFX11: [0x04,0x03,0x28,0xf1,0x01,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_cl_o v[254:255], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D
|
||||
// GFX11: [0x04,0x03,0x28,0xf1,0xf0,0xfe,0x02,0x0c]
|
||||
image_sample_c_d_cl_o v[254:255], v[241:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D
|
||||
// GFX11: [0x04,0x03,0x28,0xf1,0xf1,0xfe,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_cl_o v[5:7], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe
|
||||
// GFX11: [0x04,0x03,0x29,0xf1,0x01,0x05,0x22,0x0c]
|
||||
|
@ -3504,10 +3504,10 @@ image_sample_c_d_cl_o v[5:7], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IM
|
|||
image_sample_c_d_cl_o v[253:255], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe
|
||||
// GFX11: [0x04,0x03,0x29,0xf1,0xf8,0xfd,0x22,0x0c]
|
||||
|
||||
image_sample_c_d_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16
|
||||
image_sample_c_d_cl_o v5, v[1:10], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16
|
||||
// GFX11: [0x0c,0x03,0x2a,0xf1,0x01,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_cl_o v255, v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16
|
||||
image_sample_c_d_cl_o v255, v[240:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16
|
||||
// GFX11: [0x0c,0x03,0x2a,0xf1,0xf0,0xff,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_cl_o v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16
|
||||
|
@ -3528,10 +3528,10 @@ image_sample_c_d_cl_o v[5:6], v[1:6], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IM
|
|||
image_sample_c_d_cl_o v[254:255], v[250:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe
|
||||
// GFX11: [0x10,0x04,0x29,0xf1,0xfa,0xfe,0x22,0x0c]
|
||||
|
||||
image_sample_c_d_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16
|
||||
image_sample_c_d_cl_o v5, v[1:10], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16
|
||||
// GFX11: [0x14,0x04,0x2a,0xf1,0x01,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_cl_o v255, v[240:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16
|
||||
image_sample_c_d_cl_o v255, v[240:249], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16
|
||||
// GFX11: [0x14,0x04,0x2a,0xf1,0xf0,0xff,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_cl_o v[5:6], v[1:8], s[96:103], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 tfe d16
|
||||
|
@ -3552,10 +3552,10 @@ image_sample_c_d_cl_o_g16 v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSR
|
|||
image_sample_c_d_cl_o_g16 v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
|
||||
// GFX11: [0x00,0x03,0x59,0xf1,0xfb,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_cl_o_g16 v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
image_sample_c_d_cl_o_g16 v[5:6], v[1:10], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: [0x08,0x03,0x58,0xf1,0x01,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_cl_o_g16 v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
image_sample_c_d_cl_o_g16 v[5:6], v[240:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: [0x08,0x03,0x58,0xf1,0xf0,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_cl_o_g16 v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
|
||||
|
@ -3696,16 +3696,16 @@ image_sample_c_d_o v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1
|
|||
image_sample_c_d_o v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
|
||||
// GFX11: [0x00,0x03,0xad,0xf0,0xfb,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
image_sample_c_d_o v[5:6], v[1:11], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: [0x08,0x03,0xac,0xf0,0x01,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_o v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: [0x08,0x03,0xac,0xf0,0xf0,0x05,0x02,0x0c]
|
||||
image_sample_c_d_o v[5:6], v[241:251], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: [0x08,0x03,0xac,0xf0,0xf1,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
|
||||
image_sample_c_d_o v[5:6], v[1:10], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
|
||||
// GFX11: [0x08,0x03,0xad,0xf0,0x01,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_o v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
|
||||
image_sample_c_d_o v[5:6], v[240:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
|
||||
// GFX11: [0x08,0x03,0xad,0xf0,0xf0,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_o v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D
|
||||
|
@ -3720,11 +3720,11 @@ image_sample_c_d_o v[5:7], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2
|
|||
image_sample_c_d_o v[253:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe
|
||||
// GFX11: [0x04,0x03,0xad,0xf0,0xf9,0xfd,0x22,0x0c]
|
||||
|
||||
image_sample_c_d_o v5, v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16
|
||||
image_sample_c_d_o v5, v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16
|
||||
// GFX11: [0x0c,0x03,0xae,0xf0,0x01,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_o v255, v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16
|
||||
// GFX11: [0x0c,0x03,0xae,0xf0,0xf0,0xff,0x02,0x0c]
|
||||
image_sample_c_d_o v255, v[241:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16
|
||||
// GFX11: [0x0c,0x03,0xae,0xf0,0xf1,0xff,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_o v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16
|
||||
// GFX11: [0x0c,0x03,0xaf,0xf0,0x01,0x05,0x22,0x0c]
|
||||
|
@ -3744,11 +3744,11 @@ image_sample_c_d_o v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1
|
|||
image_sample_c_d_o v[254:255], v[251:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe
|
||||
// GFX11: [0x10,0x04,0xad,0xf0,0xfb,0xfe,0x22,0x0c]
|
||||
|
||||
image_sample_c_d_o v5, v[1:16], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16
|
||||
image_sample_c_d_o v5, v[1:9], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16
|
||||
// GFX11: [0x14,0x04,0xae,0xf0,0x01,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_o v255, v[240:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16
|
||||
// GFX11: [0x14,0x04,0xae,0xf0,0xf0,0xff,0x02,0x0c]
|
||||
image_sample_c_d_o v255, v[241:249], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16
|
||||
// GFX11: [0x14,0x04,0xae,0xf0,0xf1,0xff,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_o v[5:6], v[1:8], s[96:103], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 tfe d16
|
||||
// GFX11: [0x14,0x04,0xaf,0xf0,0x01,0x05,0x38,0x64]
|
||||
|
@ -3768,11 +3768,11 @@ image_sample_c_d_o_g16 v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_I
|
|||
image_sample_c_d_o_g16 v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
|
||||
// GFX11: [0x00,0x03,0xf1,0xf0,0xfb,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_o_g16 v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
image_sample_c_d_o_g16 v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: [0x08,0x03,0xf0,0xf0,0x01,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_o_g16 v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: [0x08,0x03,0xf0,0xf0,0xf0,0x05,0x02,0x0c]
|
||||
image_sample_c_d_o_g16 v[5:6], v[241:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: [0x08,0x03,0xf0,0xf0,0xf1,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_c_d_o_g16 v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
|
||||
// GFX11: [0x08,0x03,0xf1,0xf0,0x01,0x05,0x02,0x0c]
|
||||
|
@ -4344,11 +4344,11 @@ image_sample_d v[5:6], v[1:3], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a1
|
|||
image_sample_d v[5:6], v[253:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
|
||||
// GFX11: [0x00,0x03,0x71,0xf0,0xfd,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_d v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
image_sample_d v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: [0x08,0x03,0x70,0xf0,0x01,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_d v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: [0x08,0x03,0x70,0xf0,0xf0,0x05,0x02,0x0c]
|
||||
image_sample_d v[5:6], v[241:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: [0x08,0x03,0x70,0xf0,0xf1,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_d v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
|
||||
// GFX11: [0x08,0x03,0x71,0xf0,0x01,0x05,0x02,0x0c]
|
||||
|
@ -4416,10 +4416,10 @@ image_sample_d_cl v[5:6], v[1:3], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D
|
|||
image_sample_d_cl v[5:6], v[253:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
|
||||
// GFX11: [0x00,0x03,0x05,0xf1,0xfd,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_d_cl v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
image_sample_d_cl v[5:6], v[1:10], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: [0x08,0x03,0x04,0xf1,0x01,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_d_cl v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
image_sample_d_cl v[5:6], v[240:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: [0x08,0x03,0x04,0xf1,0xf0,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_d_cl v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
|
||||
|
@ -4560,17 +4560,17 @@ image_sample_d_cl_o v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_
|
|||
image_sample_d_cl_o v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
|
||||
// GFX11: [0x00,0x03,0x1d,0xf1,0xfc,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_d_cl_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
image_sample_d_cl_o v[5:6], v[1:11], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: [0x08,0x03,0x1c,0xf1,0x01,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_d_cl_o v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: [0x08,0x03,0x1c,0xf1,0xf0,0x05,0x02,0x0c]
|
||||
image_sample_d_cl_o v[5:6], v[241:251], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: [0x08,0x03,0x1c,0xf1,0xf1,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_d_cl_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
|
||||
image_sample_d_cl_o v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
|
||||
// GFX11: [0x08,0x03,0x1d,0xf1,0x01,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_d_cl_o v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
|
||||
// GFX11: [0x08,0x03,0x1d,0xf1,0xf0,0x05,0x02,0x0c]
|
||||
image_sample_d_cl_o v[5:6], v[241:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
|
||||
// GFX11: [0x08,0x03,0x1d,0xf1,0xf1,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_d_cl_o v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D
|
||||
// GFX11: [0x04,0x03,0x1c,0xf1,0x01,0x05,0x02,0x0c]
|
||||
|
@ -4584,11 +4584,11 @@ image_sample_d_cl_o v[5:7], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_
|
|||
image_sample_d_cl_o v[253:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe
|
||||
// GFX11: [0x04,0x03,0x1d,0xf1,0xf9,0xfd,0x22,0x0c]
|
||||
|
||||
image_sample_d_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16
|
||||
image_sample_d_cl_o v5, v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16
|
||||
// GFX11: [0x0c,0x03,0x1e,0xf1,0x01,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_d_cl_o v255, v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16
|
||||
// GFX11: [0x0c,0x03,0x1e,0xf1,0xf0,0xff,0x02,0x0c]
|
||||
image_sample_d_cl_o v255, v[241:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16
|
||||
// GFX11: [0x0c,0x03,0x1e,0xf1,0xf1,0xff,0x02,0x0c]
|
||||
|
||||
image_sample_d_cl_o v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16
|
||||
// GFX11: [0x0c,0x03,0x1f,0xf1,0x01,0x05,0x22,0x0c]
|
||||
|
@ -4608,11 +4608,11 @@ image_sample_d_cl_o v[5:6], v[1:5], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_
|
|||
image_sample_d_cl_o v[254:255], v[251:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe
|
||||
// GFX11: [0x10,0x04,0x1d,0xf1,0xfb,0xfe,0x22,0x0c]
|
||||
|
||||
image_sample_d_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16
|
||||
image_sample_d_cl_o v5, v[1:9], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16
|
||||
// GFX11: [0x14,0x04,0x1e,0xf1,0x01,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_d_cl_o v255, v[240:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16
|
||||
// GFX11: [0x14,0x04,0x1e,0xf1,0xf0,0xff,0x02,0x0c]
|
||||
image_sample_d_cl_o v255, v[241:249], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16
|
||||
// GFX11: [0x14,0x04,0x1e,0xf1,0xf1,0xff,0x02,0x0c]
|
||||
|
||||
image_sample_d_cl_o v[5:6], v[1:7], s[96:103], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 tfe d16
|
||||
// GFX11: [0x14,0x04,0x1f,0xf1,0x01,0x05,0x38,0x64]
|
||||
|
@ -4632,11 +4632,11 @@ image_sample_d_cl_o_g16 v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_
|
|||
image_sample_d_cl_o_g16 v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
|
||||
// GFX11: [0x00,0x03,0x55,0xf1,0xfc,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_d_cl_o_g16 v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
image_sample_d_cl_o_g16 v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: [0x08,0x03,0x54,0xf1,0x01,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_d_cl_o_g16 v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: [0x08,0x03,0x54,0xf1,0xf0,0x05,0x02,0x0c]
|
||||
image_sample_d_cl_o_g16 v[5:6], v[241:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: [0x08,0x03,0x54,0xf1,0xf1,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_d_cl_o_g16 v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
|
||||
// GFX11: [0x08,0x03,0x55,0xf1,0x01,0x05,0x02,0x0c]
|
||||
|
@ -4776,17 +4776,17 @@ image_sample_d_o v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D
|
|||
image_sample_d_o v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
|
||||
// GFX11: [0x00,0x03,0x99,0xf0,0xfc,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_d_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
image_sample_d_o v[5:6], v[1:10], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: [0x08,0x03,0x98,0xf0,0x01,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_d_o v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
image_sample_d_o v[5:6], v[240:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: [0x08,0x03,0x98,0xf0,0xf0,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_d_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
|
||||
image_sample_d_o v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
|
||||
// GFX11: [0x08,0x03,0x99,0xf0,0x01,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_d_o v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
|
||||
// GFX11: [0x08,0x03,0x99,0xf0,0xf0,0x05,0x02,0x0c]
|
||||
image_sample_d_o v[5:6], v[241:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
|
||||
// GFX11: [0x08,0x03,0x99,0xf0,0xf1,0x05,0x02,0x0c]
|
||||
|
||||
image_sample_d_o v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D
|
||||
// GFX11: [0x04,0x03,0x98,0xf0,0x01,0x05,0x02,0x0c]
|
||||
|
|
|
@ -163,8 +163,8 @@ image_sample_d v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RS
|
|||
image_sample_d v[64:66], v[32:39], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D
|
||||
// GFX11: image_sample_d v[64:66], v[32:39], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x07,0x70,0xf0,0x20,0x40,0x01,0x64]
|
||||
|
||||
image_sample_d v[64:66], v[32:47], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: image_sample_d v[64:66], v[32:47], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x07,0x70,0xf0,0x20,0x40,0x01,0x64]
|
||||
image_sample_d v[64:66], v[32:40], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
|
||||
// GFX11: image_sample_d v[64:66], v[32:40], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x07,0x70,0xf0,0x20,0x40,0x01,0x64]
|
||||
|
||||
image_sample_d v[64:66], [v32, v16, v8, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY
|
||||
// GFX11: image_sample_d v[64:66], [v32, v16, v8, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x11,0x07,0x70,0xf0,0x20,0x40,0x01,0x64,0x10,0x08,0x04,0x00]
|
||||
|
@ -286,17 +286,17 @@ image_msaa_load v[1:2], v[5:8], s[8:15] dmask:0x2 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY
|
|||
image_msaa_load v[10:13], [v204, v11, v14, v19], s[40:47] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY
|
||||
// GFX11: image_msaa_load v[10:13], [v204, v11, v14, v19], s[40:47] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; encoding: [0x1d,0x01,0x60,0xf0,0xcc,0x0a,0x0a,0x00,0x0b,0x0e,0x13,0x00]
|
||||
|
||||
image_bvh_intersect_ray v[4:7], v[9:24], s[4:7]
|
||||
// GFX11: image_bvh_intersect_ray v[4:7], v[9:24], s[4:7] ; encoding: [0x80,0x8f,0x64,0xf0,0x09,0x04,0x01,0x00]
|
||||
image_bvh_intersect_ray v[4:7], v[9:19], s[4:7]
|
||||
// GFX11: image_bvh_intersect_ray v[4:7], v[9:19], s[4:7] ; encoding: [0x80,0x8f,0x64,0xf0,0x09,0x04,0x01,0x00]
|
||||
|
||||
image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16
|
||||
// GFX11: image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16 ; encoding: [0x80,0x8f,0x65,0xf0,0x09,0x04,0x01,0x00]
|
||||
|
||||
image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7]
|
||||
// GFX11: image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] ; encoding: [0x80,0x8f,0x68,0xf0,0x09,0x04,0x01,0x00]
|
||||
image_bvh64_intersect_ray v[4:7], v[9:20], s[4:7]
|
||||
// GFX11: image_bvh64_intersect_ray v[4:7], v[9:20], s[4:7] ; encoding: [0x80,0x8f,0x68,0xf0,0x09,0x04,0x01,0x00]
|
||||
|
||||
image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] a16
|
||||
// GFX11: image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] a16 ; encoding: [0x80,0x8f,0x69,0xf0,0x09,0x04,0x01,0x00]
|
||||
image_bvh64_intersect_ray v[4:7], v[9:17], s[4:7] a16
|
||||
// GFX11: image_bvh64_intersect_ray v[4:7], v[9:17], s[4:7] a16 ; encoding: [0x80,0x8f,0x69,0xf0,0x09,0x04,0x01,0x00]
|
||||
|
||||
image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42], v[47:49]], s[12:15]
|
||||
// GFX11: image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42], v[47:49]], s[12:15] ; encoding: [0x81,0x8f,0x64,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x2f]
|
||||
|
|
|
@ -1848,7 +1848,7 @@ image_sample_d v5, v[1:3], s[8:15], s[12:15] dmask:0x0
|
|||
image_sample_d v5, v[1:8], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0x88,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_d v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_d v5, v[1:9], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0x88,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_d v5, v[1:4], s[8:15], s[12:15] dmask:0x1
|
||||
|
@ -1947,7 +1947,7 @@ image_sample_d_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x0
|
|||
image_sample_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0x8c,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_d_cl v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_d_cl v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0x8c,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_d_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -2619,7 +2619,7 @@ image_sample_c_d v5, v[1:4], s[8:15], s[12:15] dmask:0x0
|
|||
image_sample_c_d v5, v[1:8], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xa8,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_d v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_c_d v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xa8,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_d v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -2712,7 +2712,7 @@ image_sample_c_d_cl v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
|
|||
image_sample_c_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x0
|
||||
// CHECK: [0x00,0x00,0xac,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_d_cl v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_c_d_cl v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xac,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -3381,7 +3381,7 @@ image_sample_d_o v5, v[1:4], s[8:15], s[12:15] dmask:0x0
|
|||
image_sample_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xc8,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_d_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_d_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xc8,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_d_o v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -3474,7 +3474,7 @@ image_sample_d_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
|
|||
image_sample_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
|
||||
// CHECK: [0x00,0x00,0xcc,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_d_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_d_cl_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xcc,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -4137,7 +4137,7 @@ image_sample_c_d_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
|
|||
image_sample_c_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
|
||||
// CHECK: [0x00,0x00,0xe8,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_d_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_c_d_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xe8,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -4230,7 +4230,7 @@ image_sample_c_d_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
|
|||
image_sample_c_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
|
||||
// CHECK: [0x00,0x00,0xec,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_d_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_c_d_cl_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xec,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -6069,7 +6069,7 @@ image_sample_cd v5, v[1:3], s[8:15], s[12:15] dmask:0x0
|
|||
image_sample_cd v5, v[1:8], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xa0,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_cd v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_cd v5, v[1:9], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xa0,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x1
|
||||
|
@ -6168,7 +6168,7 @@ image_sample_cd_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x0
|
|||
image_sample_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xa4,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_cd_cl v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_cd_cl v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xa4,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_cd_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -6264,7 +6264,7 @@ image_sample_c_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x0
|
|||
image_sample_c_cd v5, v[1:8], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xa8,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_cd v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_c_cd v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xa8,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -6357,7 +6357,7 @@ image_sample_c_cd_cl v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
|
|||
image_sample_c_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x0
|
||||
// CHECK: [0x00,0x00,0xac,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_cd_cl v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_c_cd_cl v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xac,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -6453,7 +6453,7 @@ image_sample_cd_o v5, v[1:4], s[8:15], s[12:15] dmask:0x0
|
|||
image_sample_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xb0,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_cd_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_cd_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xb0,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_cd_o v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -6546,7 +6546,7 @@ image_sample_cd_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
|
|||
image_sample_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
|
||||
// CHECK: [0x00,0x00,0xb4,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_cd_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_cd_cl_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xb4,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -6639,7 +6639,7 @@ image_sample_c_cd_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
|
|||
image_sample_c_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
|
||||
// CHECK: [0x00,0x00,0xb8,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_cd_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_c_cd_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xb8,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -6732,7 +6732,7 @@ image_sample_c_cd_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
|
|||
image_sample_c_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
|
||||
// CHECK: [0x00,0x00,0xbc,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_cd_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_c_cd_cl_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xbc,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
|
|
@ -1773,7 +1773,7 @@ image_sample_d v5, v[1:3], s[8:15], s[12:15] dmask:0x0
|
|||
image_sample_d v5, v[1:8], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0x88,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_d v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_d v5, v[1:9], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0x88,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_d v5, v[1:4], s[8:15], s[12:15] dmask:0x1
|
||||
|
@ -1875,7 +1875,7 @@ image_sample_d_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x0
|
|||
image_sample_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0x8c,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_d_cl v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_d_cl v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0x8c,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_d_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -2568,7 +2568,7 @@ image_sample_c_d v5, v[1:4], s[8:15], s[12:15] dmask:0x0
|
|||
image_sample_c_d v5, v[1:8], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xa8,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_d v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_c_d v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xa8,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_d v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -2664,7 +2664,7 @@ image_sample_c_d_cl v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
|
|||
image_sample_c_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x0
|
||||
// CHECK: [0x00,0x00,0xac,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_d_cl v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_c_d_cl v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xac,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -3354,7 +3354,7 @@ image_sample_d_o v5, v[1:4], s[8:15], s[12:15] dmask:0x0
|
|||
image_sample_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xc8,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_d_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_d_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xc8,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_d_o v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -3450,7 +3450,7 @@ image_sample_d_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
|
|||
image_sample_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
|
||||
// CHECK: [0x00,0x00,0xcc,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_d_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_d_cl_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xcc,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -4134,7 +4134,7 @@ image_sample_c_d_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
|
|||
image_sample_c_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
|
||||
// CHECK: [0x00,0x00,0xe8,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_d_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_c_d_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xe8,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -4230,7 +4230,7 @@ image_sample_c_d_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
|
|||
image_sample_c_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
|
||||
// CHECK: [0x00,0x00,0xec,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_d_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_c_d_cl_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xec,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -6156,7 +6156,7 @@ image_sample_cd v5, v[1:3], s[8:15], s[12:15] dmask:0x0
|
|||
image_sample_cd v5, v[1:8], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xa0,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_cd v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_cd v5, v[1:9], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xa0,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x1
|
||||
|
@ -6258,7 +6258,7 @@ image_sample_cd_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x0
|
|||
image_sample_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xa4,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_cd_cl v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_cd_cl v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xa4,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_cd_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -6357,7 +6357,7 @@ image_sample_c_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x0
|
|||
image_sample_c_cd v5, v[1:8], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xa8,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_cd v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_c_cd v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xa8,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -6453,7 +6453,7 @@ image_sample_c_cd_cl v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
|
|||
image_sample_c_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x0
|
||||
// CHECK: [0x00,0x00,0xac,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_cd_cl v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_c_cd_cl v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xac,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -6552,7 +6552,7 @@ image_sample_cd_o v5, v[1:4], s[8:15], s[12:15] dmask:0x0
|
|||
image_sample_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xb0,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_cd_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_cd_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xb0,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_cd_o v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -6648,7 +6648,7 @@ image_sample_cd_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
|
|||
image_sample_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
|
||||
// CHECK: [0x00,0x00,0xb4,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_cd_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_cd_cl_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xb4,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -6744,7 +6744,7 @@ image_sample_c_cd_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
|
|||
image_sample_c_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
|
||||
// CHECK: [0x00,0x00,0xb8,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_cd_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_c_cd_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xb8,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -6840,7 +6840,7 @@ image_sample_c_cd_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
|
|||
image_sample_c_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
|
||||
// CHECK: [0x00,0x00,0xbc,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_cd_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_c_cd_cl_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xbc,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
|
|
@ -1851,7 +1851,7 @@ image_sample_d v5, v[1:3], s[8:15], s[12:15] dmask:0x0
|
|||
image_sample_d v5, v[1:8], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0x88,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_d v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_d v5, v[1:9], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0x88,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_d v5, v[1:4], s[8:15], s[12:15] dmask:0x1
|
||||
|
@ -1956,7 +1956,7 @@ image_sample_d_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x0
|
|||
image_sample_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0x8c,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_d_cl v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_d_cl v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0x8c,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_d_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -2667,7 +2667,7 @@ image_sample_c_d v5, v[1:4], s[8:15], s[12:15] dmask:0x0
|
|||
image_sample_c_d v5, v[1:8], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xa8,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_d v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_c_d v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xa8,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_d v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -2766,7 +2766,7 @@ image_sample_c_d_cl v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
|
|||
image_sample_c_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x0
|
||||
// CHECK: [0x00,0x00,0xac,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_d_cl v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_c_d_cl v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xac,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_d_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -3477,7 +3477,7 @@ image_sample_d_o v5, v[1:4], s[8:15], s[12:15] dmask:0x0
|
|||
image_sample_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xc8,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_d_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_d_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xc8,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_d_o v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -3576,7 +3576,7 @@ image_sample_d_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
|
|||
image_sample_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
|
||||
// CHECK: [0x00,0x00,0xcc,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_d_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_d_cl_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xcc,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -4278,7 +4278,7 @@ image_sample_c_d_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
|
|||
image_sample_c_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
|
||||
// CHECK: [0x00,0x00,0xe8,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_d_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_c_d_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xe8,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_d_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -4377,7 +4377,7 @@ image_sample_c_d_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
|
|||
image_sample_c_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
|
||||
// CHECK: [0x00,0x00,0xec,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_d_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_c_d_cl_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xec,0xf0,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_d_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -6399,7 +6399,7 @@ image_sample_cd v5, v[1:3], s[8:15], s[12:15] dmask:0x0
|
|||
image_sample_cd v5, v[1:8], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xa0,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_cd v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_cd v5, v[1:9], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xa0,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x1
|
||||
|
@ -6504,7 +6504,7 @@ image_sample_cd_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x0
|
|||
image_sample_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xa4,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_cd_cl v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_cd_cl v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xa4,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_cd_cl v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -6606,7 +6606,7 @@ image_sample_c_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x0
|
|||
image_sample_c_cd v5, v[1:8], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xa8,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_cd v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_c_cd v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xa8,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_cd v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -6705,7 +6705,7 @@ image_sample_c_cd_cl v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
|
|||
image_sample_c_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x0
|
||||
// CHECK: [0x00,0x00,0xac,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_cd_cl v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_c_cd_cl v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xac,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_cd_cl v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -6807,7 +6807,7 @@ image_sample_cd_o v5, v[1:4], s[8:15], s[12:15] dmask:0x0
|
|||
image_sample_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xb0,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_cd_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_cd_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xb0,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_cd_o v5, v[1:4], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -6906,7 +6906,7 @@ image_sample_cd_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
|
|||
image_sample_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
|
||||
// CHECK: [0x00,0x00,0xb4,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_cd_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_cd_cl_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xb4,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -7005,7 +7005,7 @@ image_sample_c_cd_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
|
|||
image_sample_c_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
|
||||
// CHECK: [0x00,0x00,0xb8,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_cd_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_c_cd_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xb8,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_cd_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
@ -7104,7 +7104,7 @@ image_sample_c_cd_cl_o v[5:8], v[1:8], s[8:15], s[12:15] dmask:0xf
|
|||
image_sample_c_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x0
|
||||
// CHECK: [0x00,0x00,0xbc,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_cd_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x1
|
||||
image_sample_c_cd_cl_o v5, v[1:10], s[8:15], s[12:15] dmask:0x1
|
||||
// CHECK: [0x00,0x01,0xbc,0xf1,0x01,0x05,0x62,0x00]
|
||||
|
||||
image_sample_c_cd_cl_o v5, v[1:8], s[8:15], s[12:15] dmask:0x1 unorm
|
||||
|
|
|
@ -75,16 +75,16 @@
|
|||
# GFX10: v_fmac_legacy_f32_e64 v0, s1, 2.0
|
||||
0x00,0x00,0x06,0xd5,0x01,0xe8,0x01,0x00
|
||||
|
||||
# GFX10: image_bvh_intersect_ray v[4:7], v[9:24], s[4:7]
|
||||
# GFX10: image_bvh_intersect_ray v[4:7], v[9:19], s[4:7]
|
||||
0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x00
|
||||
|
||||
# GFX10: image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16
|
||||
0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x40
|
||||
|
||||
# GFX10: image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7]
|
||||
# GFX10: image_bvh64_intersect_ray v[4:7], v[9:20], s[4:7]
|
||||
0x01,0x9f,0x9c,0xf1,0x09,0x04,0x01,0x00
|
||||
|
||||
# GFX10: image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] a16
|
||||
# GFX10: image_bvh64_intersect_ray v[4:7], v[9:17], s[4:7] a16
|
||||
0x01,0x9f,0x9c,0xf1,0x09,0x04,0x01,0x40
|
||||
|
||||
# GFX10: image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19, v37, v40], s[12:15]
|
||||
|
|
|
@ -1248,22 +1248,22 @@
|
|||
# GFX11: image_atomic_xor v[254:255], v[254:255], ttmp[8:15] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA unorm glc slc dlc a16 lwe ; encoding: [0x98,0x73,0x51,0xf0,0xfe,0xfe,0x5d,0x00]
|
||||
0x98,0x73,0x51,0xf0,0xfe,0xfe,0x5d,0x00
|
||||
|
||||
# GFX11: image_bvh64_intersect_ray v[5:8], v[1:16], s[8:11] ; encoding: [0x80,0x8f,0x68,0xf0,0x01,0x05,0x02,0x00]
|
||||
# GFX11: image_bvh64_intersect_ray v[5:8], v[1:12], s[8:11] ; encoding: [0x80,0x8f,0x68,0xf0,0x01,0x05,0x02,0x00]
|
||||
0x80,0x8f,0x68,0xf0,0x01,0x05,0x02,0x00
|
||||
|
||||
# GFX11: image_bvh64_intersect_ray v[5:8], v[240:255], s[8:11] ; encoding: [0x80,0x8f,0x68,0xf0,0xf0,0x05,0x02,0x00]
|
||||
# GFX11: image_bvh64_intersect_ray v[5:8], v[240:251], s[8:11] ; encoding: [0x80,0x8f,0x68,0xf0,0xf0,0x05,0x02,0x00]
|
||||
0x80,0x8f,0x68,0xf0,0xf0,0x05,0x02,0x00
|
||||
|
||||
# GFX11: image_bvh64_intersect_ray v[5:8], v[1:16], s[100:103] a16 ; encoding: [0x80,0x8f,0x69,0xf0,0x01,0x05,0x19,0x00]
|
||||
# GFX11: image_bvh64_intersect_ray v[5:8], v[1:9], s[100:103] a16 ; encoding: [0x80,0x8f,0x69,0xf0,0x01,0x05,0x19,0x00]
|
||||
0x80,0x8f,0x69,0xf0,0x01,0x05,0x19,0x00
|
||||
|
||||
# GFX11: image_bvh64_intersect_ray v[252:255], v[240:255], ttmp[12:15] a16 ; encoding: [0x80,0x8f,0x69,0xf0,0xf0,0xfc,0x1e,0x00]
|
||||
# GFX11: image_bvh64_intersect_ray v[252:255], v[240:248], ttmp[12:15] a16 ; encoding: [0x80,0x8f,0x69,0xf0,0xf0,0xfc,0x1e,0x00]
|
||||
0x80,0x8f,0x69,0xf0,0xf0,0xfc,0x1e,0x00
|
||||
|
||||
# GFX11: image_bvh_intersect_ray v[5:8], v[1:16], s[8:11] ; encoding: [0x80,0x8f,0x64,0xf0,0x01,0x05,0x02,0x00]
|
||||
# GFX11: image_bvh_intersect_ray v[5:8], v[1:11], s[8:11] ; encoding: [0x80,0x8f,0x64,0xf0,0x01,0x05,0x02,0x00]
|
||||
0x80,0x8f,0x64,0xf0,0x01,0x05,0x02,0x00
|
||||
|
||||
# GFX11: image_bvh_intersect_ray v[5:8], v[240:255], s[8:11] ; encoding: [0x80,0x8f,0x64,0xf0,0xf0,0x05,0x02,0x00]
|
||||
# GFX11: image_bvh_intersect_ray v[5:8], v[240:250], s[8:11] ; encoding: [0x80,0x8f,0x64,0xf0,0xf0,0x05,0x02,0x00]
|
||||
0x80,0x8f,0x64,0xf0,0xf0,0x05,0x02,0x00
|
||||
|
||||
# GFX11: image_bvh_intersect_ray v[5:8], v[1:8], s[100:103] a16 ; encoding: [0x80,0x8f,0x65,0xf0,0x01,0x05,0x19,0x00]
|
||||
|
@ -3264,16 +3264,16 @@
|
|||
# GFX11: image_sample_c_d v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x85,0xf0,0xfc,0x05,0x02,0x0c]
|
||||
0x00,0x03,0x85,0xf0,0xfc,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x84,0xf0,0x01,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d v[5:6], v[1:10], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x84,0xf0,0x01,0x05,0x02,0x0c]
|
||||
0x08,0x03,0x84,0xf0,0x01,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x84,0xf0,0xf0,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d v[5:6], v[240:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x84,0xf0,0xf0,0x05,0x02,0x0c]
|
||||
0x08,0x03,0x84,0xf0,0xf0,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x85,0xf0,0x01,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x85,0xf0,0x01,0x05,0x02,0x0c]
|
||||
0x08,0x03,0x85,0xf0,0x01,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x85,0xf0,0xf0,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d v[5:6], v[240:248], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x85,0xf0,0xf0,0x05,0x02,0x0c]
|
||||
0x08,0x03,0x85,0xf0,0xf0,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0x84,0xf0,0x01,0x05,0x02,0x0c]
|
||||
|
@ -3336,16 +3336,16 @@
|
|||
# GFX11: image_sample_c_d_cl v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x11,0xf1,0xfc,0x05,0x02,0x0c]
|
||||
0x00,0x03,0x11,0xf1,0xfc,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_cl v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x10,0xf1,0x01,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d_cl v[5:6], v[1:11], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x10,0xf1,0x01,0x05,0x02,0x0c]
|
||||
0x08,0x03,0x10,0xf1,0x01,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_cl v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x10,0xf1,0xf0,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d_cl v[5:6], v[240:250], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x10,0xf1,0xf0,0x05,0x02,0x0c]
|
||||
0x08,0x03,0x10,0xf1,0xf0,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_cl v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x11,0xf1,0x01,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d_cl v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x11,0xf1,0x01,0x05,0x02,0x0c]
|
||||
0x08,0x03,0x11,0xf1,0x01,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_cl v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x11,0xf1,0xf0,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d_cl v[5:6], v[240:248], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x11,0xf1,0xf0,0x05,0x02,0x0c]
|
||||
0x08,0x03,0x11,0xf1,0xf0,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_cl v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0x10,0xf1,0x01,0x05,0x02,0x0c]
|
||||
|
@ -3360,10 +3360,10 @@
|
|||
# GFX11: image_sample_c_d_cl v[253:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe ; encoding: [0x04,0x03,0x11,0xf1,0xf9,0xfd,0x22,0x0c]
|
||||
0x04,0x03,0x11,0xf1,0xf9,0xfd,0x22,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_cl v5, v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16 ; encoding: [0x0c,0x03,0x12,0xf1,0x01,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d_cl v5, v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16 ; encoding: [0x0c,0x03,0x12,0xf1,0x01,0x05,0x02,0x0c]
|
||||
0x0c,0x03,0x12,0xf1,0x01,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_cl v255, v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16 ; encoding: [0x0c,0x03,0x12,0xf1,0xf0,0xff,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d_cl v255, v[240:248], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16 ; encoding: [0x0c,0x03,0x12,0xf1,0xf0,0xff,0x02,0x0c]
|
||||
0x0c,0x03,0x12,0xf1,0xf0,0xff,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_cl v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16 ; encoding: [0x0c,0x03,0x13,0xf1,0x01,0x05,0x22,0x0c]
|
||||
|
@ -3384,10 +3384,10 @@
|
|||
# GFX11: image_sample_c_d_cl v[254:255], v[251:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe ; encoding: [0x10,0x04,0x11,0xf1,0xfb,0xfe,0x22,0x0c]
|
||||
0x10,0x04,0x11,0xf1,0xfb,0xfe,0x22,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_cl v5, v[1:16], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16 ; encoding: [0x14,0x04,0x12,0xf1,0x01,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d_cl v5, v[1:9], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16 ; encoding: [0x14,0x04,0x12,0xf1,0x01,0x05,0x02,0x0c]
|
||||
0x14,0x04,0x12,0xf1,0x01,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_cl v255, v[240:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16 ; encoding: [0x14,0x04,0x12,0xf1,0xf0,0xff,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d_cl v255, v[240:248], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16 ; encoding: [0x14,0x04,0x12,0xf1,0xf0,0xff,0x02,0x0c]
|
||||
0x14,0x04,0x12,0xf1,0xf0,0xff,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_cl v[5:6], v[1:7], s[96:103], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 tfe d16 ; encoding: [0x14,0x04,0x13,0xf1,0x01,0x05,0x38,0x64]
|
||||
|
@ -3408,10 +3408,10 @@
|
|||
# GFX11: image_sample_c_d_cl_g16 v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x51,0xf1,0xfc,0x05,0x02,0x0c]
|
||||
0x00,0x03,0x51,0xf1,0xfc,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_cl_g16 v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x50,0xf1,0x01,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d_cl_g16 v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x50,0xf1,0x01,0x05,0x02,0x0c]
|
||||
0x08,0x03,0x50,0xf1,0x01,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_cl_g16 v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x50,0xf1,0xf0,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d_cl_g16 v[5:6], v[240:248], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x50,0xf1,0xf0,0x05,0x02,0x0c]
|
||||
0x08,0x03,0x50,0xf1,0xf0,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_cl_g16 v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x51,0xf1,0x01,0x05,0x02,0x0c]
|
||||
|
@ -3480,22 +3480,22 @@
|
|||
# GFX11: image_sample_c_d_cl_o v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x29,0xf1,0xfb,0x05,0x02,0x0c]
|
||||
0x00,0x03,0x29,0xf1,0xfb,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_cl_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x28,0xf1,0x01,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d_cl_o v[5:6], v[1:12], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x28,0xf1,0x01,0x05,0x02,0x0c]
|
||||
0x08,0x03,0x28,0xf1,0x01,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_cl_o v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x28,0xf1,0xf0,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d_cl_o v[5:6], v[240:251], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x28,0xf1,0xf0,0x05,0x02,0x0c]
|
||||
0x08,0x03,0x28,0xf1,0xf0,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_cl_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x29,0xf1,0x01,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d_cl_o v[5:6], v[1:10], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x29,0xf1,0x01,0x05,0x02,0x0c]
|
||||
0x08,0x03,0x29,0xf1,0x01,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_cl_o v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x29,0xf1,0xf0,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d_cl_o v[5:6], v[240:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x29,0xf1,0xf0,0x05,0x02,0x0c]
|
||||
0x08,0x03,0x29,0xf1,0xf0,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_cl_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0x28,0xf1,0x01,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d_cl_o v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0x28,0xf1,0x01,0x05,0x02,0x0c]
|
||||
0x04,0x03,0x28,0xf1,0x01,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_cl_o v[254:255], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0x28,0xf1,0xf0,0xfe,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d_cl_o v[254:255], v[240:248], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0x28,0xf1,0xf0,0xfe,0x02,0x0c]
|
||||
0x04,0x03,0x28,0xf1,0xf0,0xfe,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_cl_o v[5:7], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe ; encoding: [0x04,0x03,0x29,0xf1,0x01,0x05,0x22,0x0c]
|
||||
|
@ -3504,10 +3504,10 @@
|
|||
# GFX11: image_sample_c_d_cl_o v[253:255], v[248:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe ; encoding: [0x04,0x03,0x29,0xf1,0xf8,0xfd,0x22,0x0c]
|
||||
0x04,0x03,0x29,0xf1,0xf8,0xfd,0x22,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16 ; encoding: [0x0c,0x03,0x2a,0xf1,0x01,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d_cl_o v5, v[1:10], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16 ; encoding: [0x0c,0x03,0x2a,0xf1,0x01,0x05,0x02,0x0c]
|
||||
0x0c,0x03,0x2a,0xf1,0x01,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_cl_o v255, v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16 ; encoding: [0x0c,0x03,0x2a,0xf1,0xf0,0xff,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d_cl_o v255, v[240:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16 ; encoding: [0x0c,0x03,0x2a,0xf1,0xf0,0xff,0x02,0x0c]
|
||||
0x0c,0x03,0x2a,0xf1,0xf0,0xff,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_cl_o v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16 ; encoding: [0x0c,0x03,0x2b,0xf1,0x01,0x05,0x22,0x0c]
|
||||
|
@ -3528,10 +3528,10 @@
|
|||
# GFX11: image_sample_c_d_cl_o v[254:255], v[250:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe ; encoding: [0x10,0x04,0x29,0xf1,0xfa,0xfe,0x22,0x0c]
|
||||
0x10,0x04,0x29,0xf1,0xfa,0xfe,0x22,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16 ; encoding: [0x14,0x04,0x2a,0xf1,0x01,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d_cl_o v5, v[1:10], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16 ; encoding: [0x14,0x04,0x2a,0xf1,0x01,0x05,0x02,0x0c]
|
||||
0x14,0x04,0x2a,0xf1,0x01,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_cl_o v255, v[240:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16 ; encoding: [0x14,0x04,0x2a,0xf1,0xf0,0xff,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d_cl_o v255, v[240:249], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16 ; encoding: [0x14,0x04,0x2a,0xf1,0xf0,0xff,0x02,0x0c]
|
||||
0x14,0x04,0x2a,0xf1,0xf0,0xff,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_cl_o v[5:6], v[1:8], s[96:103], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 tfe d16 ; encoding: [0x14,0x04,0x2b,0xf1,0x01,0x05,0x38,0x64]
|
||||
|
@ -3552,10 +3552,10 @@
|
|||
# GFX11: image_sample_c_d_cl_o_g16 v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x59,0xf1,0xfb,0x05,0x02,0x0c]
|
||||
0x00,0x03,0x59,0xf1,0xfb,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_cl_o_g16 v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x58,0xf1,0x01,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d_cl_o_g16 v[5:6], v[1:10], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x58,0xf1,0x01,0x05,0x02,0x0c]
|
||||
0x08,0x03,0x58,0xf1,0x01,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_cl_o_g16 v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x58,0xf1,0xf0,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d_cl_o_g16 v[5:6], v[240:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x58,0xf1,0xf0,0x05,0x02,0x0c]
|
||||
0x08,0x03,0x58,0xf1,0xf0,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_cl_o_g16 v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x59,0xf1,0x01,0x05,0x02,0x0c]
|
||||
|
@ -3696,16 +3696,16 @@
|
|||
# GFX11: image_sample_c_d_o v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0xad,0xf0,0xfb,0x05,0x02,0x0c]
|
||||
0x00,0x03,0xad,0xf0,0xfb,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0xac,0xf0,0x01,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d_o v[5:6], v[1:11], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0xac,0xf0,0x01,0x05,0x02,0x0c]
|
||||
0x08,0x03,0xac,0xf0,0x01,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_o v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0xac,0xf0,0xf0,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d_o v[5:6], v[240:250], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0xac,0xf0,0xf0,0x05,0x02,0x0c]
|
||||
0x08,0x03,0xac,0xf0,0xf0,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0xad,0xf0,0x01,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d_o v[5:6], v[1:10], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0xad,0xf0,0x01,0x05,0x02,0x0c]
|
||||
0x08,0x03,0xad,0xf0,0x01,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_o v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0xad,0xf0,0xf0,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d_o v[5:6], v[240:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0xad,0xf0,0xf0,0x05,0x02,0x0c]
|
||||
0x08,0x03,0xad,0xf0,0xf0,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_o v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0xac,0xf0,0x01,0x05,0x02,0x0c]
|
||||
|
@ -3720,10 +3720,10 @@
|
|||
# GFX11: image_sample_c_d_o v[253:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe ; encoding: [0x04,0x03,0xad,0xf0,0xf9,0xfd,0x22,0x0c]
|
||||
0x04,0x03,0xad,0xf0,0xf9,0xfd,0x22,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_o v5, v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16 ; encoding: [0x0c,0x03,0xae,0xf0,0x01,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d_o v5, v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16 ; encoding: [0x0c,0x03,0xae,0xf0,0x01,0x05,0x02,0x0c]
|
||||
0x0c,0x03,0xae,0xf0,0x01,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_o v255, v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16 ; encoding: [0x0c,0x03,0xae,0xf0,0xf0,0xff,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d_o v255, v[240:248], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16 ; encoding: [0x0c,0x03,0xae,0xf0,0xf0,0xff,0x02,0x0c]
|
||||
0x0c,0x03,0xae,0xf0,0xf0,0xff,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_o v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16 ; encoding: [0x0c,0x03,0xaf,0xf0,0x01,0x05,0x22,0x0c]
|
||||
|
@ -3744,10 +3744,10 @@
|
|||
# GFX11: image_sample_c_d_o v[254:255], v[251:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe ; encoding: [0x10,0x04,0xad,0xf0,0xfb,0xfe,0x22,0x0c]
|
||||
0x10,0x04,0xad,0xf0,0xfb,0xfe,0x22,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_o v5, v[1:16], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16 ; encoding: [0x14,0x04,0xae,0xf0,0x01,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d_o v5, v[1:9], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16 ; encoding: [0x14,0x04,0xae,0xf0,0x01,0x05,0x02,0x0c]
|
||||
0x14,0x04,0xae,0xf0,0x01,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_o v255, v[240:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16 ; encoding: [0x14,0x04,0xae,0xf0,0xf0,0xff,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d_o v255, v[240:248], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16 ; encoding: [0x14,0x04,0xae,0xf0,0xf0,0xff,0x02,0x0c]
|
||||
0x14,0x04,0xae,0xf0,0xf0,0xff,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_o v[5:6], v[1:8], s[96:103], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 tfe d16 ; encoding: [0x14,0x04,0xaf,0xf0,0x01,0x05,0x38,0x64]
|
||||
|
@ -3768,10 +3768,10 @@
|
|||
# GFX11: image_sample_c_d_o_g16 v[5:6], v[251:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0xf1,0xf0,0xfb,0x05,0x02,0x0c]
|
||||
0x00,0x03,0xf1,0xf0,0xfb,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_o_g16 v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0xf0,0xf0,0x01,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d_o_g16 v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0xf0,0xf0,0x01,0x05,0x02,0x0c]
|
||||
0x08,0x03,0xf0,0xf0,0x01,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_o_g16 v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0xf0,0xf0,0xf0,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_c_d_o_g16 v[5:6], v[240:248], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0xf0,0xf0,0xf0,0x05,0x02,0x0c]
|
||||
0x08,0x03,0xf0,0xf0,0xf0,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_c_d_o_g16 v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0xf1,0xf0,0x01,0x05,0x02,0x0c]
|
||||
|
@ -4344,10 +4344,10 @@
|
|||
# GFX11: image_sample_d v[5:6], v[253:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x71,0xf0,0xfd,0x05,0x02,0x0c]
|
||||
0x00,0x03,0x71,0xf0,0xfd,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_d v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x70,0xf0,0x01,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_d v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x70,0xf0,0x01,0x05,0x02,0x0c]
|
||||
0x08,0x03,0x70,0xf0,0x01,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_d v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x70,0xf0,0xf0,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_d v[5:6], v[240:248], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x70,0xf0,0xf0,0x05,0x02,0x0c]
|
||||
0x08,0x03,0x70,0xf0,0xf0,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_d v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x71,0xf0,0x01,0x05,0x02,0x0c]
|
||||
|
@ -4416,10 +4416,10 @@
|
|||
# GFX11: image_sample_d_cl v[5:6], v[253:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x05,0xf1,0xfd,0x05,0x02,0x0c]
|
||||
0x00,0x03,0x05,0xf1,0xfd,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_d_cl v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x04,0xf1,0x01,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_d_cl v[5:6], v[1:10], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x04,0xf1,0x01,0x05,0x02,0x0c]
|
||||
0x08,0x03,0x04,0xf1,0x01,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_d_cl v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x04,0xf1,0xf0,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_d_cl v[5:6], v[240:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x04,0xf1,0xf0,0x05,0x02,0x0c]
|
||||
0x08,0x03,0x04,0xf1,0xf0,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_d_cl v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x05,0xf1,0x01,0x05,0x02,0x0c]
|
||||
|
@ -4560,16 +4560,16 @@
|
|||
# GFX11: image_sample_d_cl_o v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x1d,0xf1,0xfc,0x05,0x02,0x0c]
|
||||
0x00,0x03,0x1d,0xf1,0xfc,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_d_cl_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x1c,0xf1,0x01,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_d_cl_o v[5:6], v[1:11], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x1c,0xf1,0x01,0x05,0x02,0x0c]
|
||||
0x08,0x03,0x1c,0xf1,0x01,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_d_cl_o v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x1c,0xf1,0xf0,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_d_cl_o v[5:6], v[240:250], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x1c,0xf1,0xf0,0x05,0x02,0x0c]
|
||||
0x08,0x03,0x1c,0xf1,0xf0,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_d_cl_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x1d,0xf1,0x01,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_d_cl_o v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x1d,0xf1,0x01,0x05,0x02,0x0c]
|
||||
0x08,0x03,0x1d,0xf1,0x01,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_d_cl_o v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x1d,0xf1,0xf0,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_d_cl_o v[5:6], v[240:248], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x1d,0xf1,0xf0,0x05,0x02,0x0c]
|
||||
0x08,0x03,0x1d,0xf1,0xf0,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_d_cl_o v[5:6], v[1:8], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0x1c,0xf1,0x01,0x05,0x02,0x0c]
|
||||
|
@ -4584,10 +4584,10 @@
|
|||
# GFX11: image_sample_d_cl_o v[253:255], v[249:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 tfe ; encoding: [0x04,0x03,0x1d,0xf1,0xf9,0xfd,0x22,0x0c]
|
||||
0x04,0x03,0x1d,0xf1,0xf9,0xfd,0x22,0x0c
|
||||
|
||||
# GFX11: image_sample_d_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16 ; encoding: [0x0c,0x03,0x1e,0xf1,0x01,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_d_cl_o v5, v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16 ; encoding: [0x0c,0x03,0x1e,0xf1,0x01,0x05,0x02,0x0c]
|
||||
0x0c,0x03,0x1e,0xf1,0x01,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_d_cl_o v255, v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16 ; encoding: [0x0c,0x03,0x1e,0xf1,0xf0,0xff,0x02,0x0c]
|
||||
# GFX11: image_sample_d_cl_o v255, v[240:248], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE d16 ; encoding: [0x0c,0x03,0x1e,0xf1,0xf0,0xff,0x02,0x0c]
|
||||
0x0c,0x03,0x1e,0xf1,0xf0,0xff,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_d_cl_o v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_CUBE a16 tfe d16 ; encoding: [0x0c,0x03,0x1f,0xf1,0x01,0x05,0x22,0x0c]
|
||||
|
@ -4608,10 +4608,10 @@
|
|||
# GFX11: image_sample_d_cl_o v[254:255], v[251:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY a16 tfe ; encoding: [0x10,0x04,0x1d,0xf1,0xfb,0xfe,0x22,0x0c]
|
||||
0x10,0x04,0x1d,0xf1,0xfb,0xfe,0x22,0x0c
|
||||
|
||||
# GFX11: image_sample_d_cl_o v5, v[1:16], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16 ; encoding: [0x14,0x04,0x1e,0xf1,0x01,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_d_cl_o v5, v[1:9], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16 ; encoding: [0x14,0x04,0x1e,0xf1,0x01,0x05,0x02,0x0c]
|
||||
0x14,0x04,0x1e,0xf1,0x01,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_d_cl_o v255, v[240:255], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16 ; encoding: [0x14,0x04,0x1e,0xf1,0xf0,0xff,0x02,0x0c]
|
||||
# GFX11: image_sample_d_cl_o v255, v[240:248], s[8:15], s[12:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY d16 ; encoding: [0x14,0x04,0x1e,0xf1,0xf0,0xff,0x02,0x0c]
|
||||
0x14,0x04,0x1e,0xf1,0xf0,0xff,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_d_cl_o v[5:6], v[1:7], s[96:103], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 tfe d16 ; encoding: [0x14,0x04,0x1f,0xf1,0x01,0x05,0x38,0x64]
|
||||
|
@ -4632,10 +4632,10 @@
|
|||
# GFX11: image_sample_d_cl_o_g16 v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x55,0xf1,0xfc,0x05,0x02,0x0c]
|
||||
0x00,0x03,0x55,0xf1,0xfc,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_d_cl_o_g16 v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x54,0xf1,0x01,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_d_cl_o_g16 v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x54,0xf1,0x01,0x05,0x02,0x0c]
|
||||
0x08,0x03,0x54,0xf1,0x01,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_d_cl_o_g16 v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x54,0xf1,0xf0,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_d_cl_o_g16 v[5:6], v[240:248], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x54,0xf1,0xf0,0x05,0x02,0x0c]
|
||||
0x08,0x03,0x54,0xf1,0xf0,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_d_cl_o_g16 v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x55,0xf1,0x01,0x05,0x02,0x0c]
|
||||
|
@ -4776,16 +4776,16 @@
|
|||
# GFX11: image_sample_d_o v[5:6], v[252:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x03,0x99,0xf0,0xfc,0x05,0x02,0x0c]
|
||||
0x00,0x03,0x99,0xf0,0xfc,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_d_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x98,0xf0,0x01,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_d_o v[5:6], v[1:10], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x98,0xf0,0x01,0x05,0x02,0x0c]
|
||||
0x08,0x03,0x98,0xf0,0x01,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_d_o v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x98,0xf0,0xf0,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_d_o v[5:6], v[240:249], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x03,0x98,0xf0,0xf0,0x05,0x02,0x0c]
|
||||
0x08,0x03,0x98,0xf0,0xf0,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_d_o v[5:6], v[1:16], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x99,0xf0,0x01,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_d_o v[5:6], v[1:9], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x99,0xf0,0x01,0x05,0x02,0x0c]
|
||||
0x08,0x03,0x99,0xf0,0x01,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_d_o v[5:6], v[240:255], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x99,0xf0,0xf0,0x05,0x02,0x0c]
|
||||
# GFX11: image_sample_d_o v[5:6], v[240:248], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_3D a16 ; encoding: [0x08,0x03,0x99,0xf0,0xf0,0x05,0x02,0x0c]
|
||||
0x08,0x03,0x99,0xf0,0xf0,0x05,0x02,0x0c
|
||||
|
||||
# GFX11: image_sample_d_o v[5:6], v[1:7], s[8:15], s[12:15] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x03,0x98,0xf0,0x01,0x05,0x02,0x0c]
|
||||
|
|
|
@ -162,7 +162,7 @@
|
|||
# GFX11: image_sample_d v[64:66], v[32:37], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x07,0x70,0xf0,0x20,0x40,0x01,0x64]
|
||||
0x04,0x07,0x70,0xf0,0x20,0x40,0x01,0x64
|
||||
|
||||
# GFX11: image_sample_d v[64:66], v[32:47], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x07,0x70,0xf0,0x20,0x40,0x01,0x64]
|
||||
# GFX11: image_sample_d v[64:66], v[32:40], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x07,0x70,0xf0,0x20,0x40,0x01,0x64]
|
||||
0x08,0x07,0x70,0xf0,0x20,0x40,0x01,0x64
|
||||
|
||||
# GFX11: image_sample_d v[64:66], [v32, v16, v8, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x11,0x07,0x70,0xf0,0x20,0x40,0x01,0x64,0x10,0x08,0x04,0x00]
|
||||
|
@ -282,16 +282,16 @@
|
|||
# GFX11: image_msaa_load v[10:13], [v204, v11, v14, v19], s[40:47] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; encoding: [0x1d,0x01,0x60,0xf0,0xcc,0x0a,0x0a,0x00,0x0b,0x0e,0x13,0x00]
|
||||
0x1d,0x01,0x60,0xf0,0xcc,0x0a,0x0a,0x00,0x0b,0x0e,0x13,0x00
|
||||
|
||||
# GFX11: image_bvh_intersect_ray v[4:7], v[9:24], s[4:7] ; encoding: [0x80,0x8f,0x64,0xf0,0x09,0x04,0x01,0x00]
|
||||
# GFX11: image_bvh_intersect_ray v[4:7], v[9:19], s[4:7] ; encoding: [0x80,0x8f,0x64,0xf0,0x09,0x04,0x01,0x00]
|
||||
0x80,0x8f,0x64,0xf0,0x09,0x04,0x01,0x00
|
||||
|
||||
# GFX11: image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16 ; encoding: [0x80,0x8f,0x65,0xf0,0x09,0x04,0x01,0x00]
|
||||
0x80,0x8f,0x65,0xf0,0x09,0x04,0x01,0x00
|
||||
|
||||
# GFX11: image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] ; encoding: [0x80,0x8f,0x68,0xf0,0x09,0x04,0x01,0x00]
|
||||
# GFX11: image_bvh64_intersect_ray v[4:7], v[9:20], s[4:7] ; encoding: [0x80,0x8f,0x68,0xf0,0x09,0x04,0x01,0x00]
|
||||
0x80,0x8f,0x68,0xf0,0x09,0x04,0x01,0x00
|
||||
|
||||
# GFX11: image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] a16 ; encoding: [0x80,0x8f,0x69,0xf0,0x09,0x04,0x01,0x00]
|
||||
# GFX11: image_bvh64_intersect_ray v[4:7], v[9:17], s[4:7] a16 ; encoding: [0x80,0x8f,0x69,0xf0,0x09,0x04,0x01,0x00]
|
||||
0x80,0x8f,0x69,0xf0,0x09,0x04,0x01,0x00
|
||||
|
||||
# GFX11: image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42], v[47:49]], s[12:15] ; encoding: [0x81,0x8f,0x64,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x2f]
|
||||
|
|
Loading…
Reference in New Issue