forked from OSchip/llvm-project
[AMDGPU] Add maximum NSA size limit ISA feature
Add maximum NSA size limit as an ISA feature. Use this to reduce NSA usage on GFX10.1 to avoid stability issues with 4 and 5 dwords NSA instructions. Maintain use of longer NSA instructions on GFX10.3. Note: this also contains some minor fixes for GlobalISel which did not work correctly with non-NSA form instructions on GFX10. Reviewed By: foad Differential Revision: https://reviews.llvm.org/D103348
This commit is contained in:
parent
a7767171cb
commit
7d4baf25aa
|
@ -629,6 +629,16 @@ def FeatureNoDataDepHazard : SubtargetFeature<"no-data-dep-hazard",
|
|||
"Does not need SW waitstates"
|
||||
>;
|
||||
|
||||
class SubtargetFeatureNSAMaxSize <int Value> : SubtargetFeature <
|
||||
"nsa-max-size-"#Value,
|
||||
"NSAMaxSize",
|
||||
!cast<string>(Value),
|
||||
"The maximum non-sequential address size in VGPRs."
|
||||
>;
|
||||
|
||||
def FeatureNSAMaxSize5 : SubtargetFeatureNSAMaxSize<5>;
|
||||
def FeatureNSAMaxSize13 : SubtargetFeatureNSAMaxSize<13>;
|
||||
|
||||
//===------------------------------------------------------------===//
|
||||
// Subtarget Features (options and debugging)
|
||||
//===------------------------------------------------------------===//
|
||||
|
@ -1031,6 +1041,7 @@ def FeatureISAVersion10_1_0 : FeatureSet<
|
|||
FeatureLDSBankCount32,
|
||||
FeatureDLInsts,
|
||||
FeatureNSAEncoding,
|
||||
FeatureNSAMaxSize5,
|
||||
FeatureWavefrontSize32,
|
||||
FeatureScalarStores,
|
||||
FeatureScalarAtomics,
|
||||
|
@ -1052,6 +1063,7 @@ def FeatureISAVersion10_1_1 : FeatureSet<
|
|||
FeatureDot6Insts,
|
||||
FeatureDot7Insts,
|
||||
FeatureNSAEncoding,
|
||||
FeatureNSAMaxSize5,
|
||||
FeatureWavefrontSize32,
|
||||
FeatureScalarStores,
|
||||
FeatureScalarAtomics,
|
||||
|
@ -1073,6 +1085,7 @@ def FeatureISAVersion10_1_2 : FeatureSet<
|
|||
FeatureDot6Insts,
|
||||
FeatureDot7Insts,
|
||||
FeatureNSAEncoding,
|
||||
FeatureNSAMaxSize5,
|
||||
FeatureWavefrontSize32,
|
||||
FeatureScalarStores,
|
||||
FeatureScalarAtomics,
|
||||
|
@ -1090,6 +1103,7 @@ def FeatureISAVersion10_1_3 : FeatureSet<
|
|||
FeatureLDSBankCount32,
|
||||
FeatureDLInsts,
|
||||
FeatureNSAEncoding,
|
||||
FeatureNSAMaxSize5,
|
||||
FeatureWavefrontSize32,
|
||||
FeatureScalarStores,
|
||||
FeatureScalarAtomics,
|
||||
|
@ -1113,6 +1127,7 @@ def FeatureISAVersion10_3_0 : FeatureSet<
|
|||
FeatureDot6Insts,
|
||||
FeatureDot7Insts,
|
||||
FeatureNSAEncoding,
|
||||
FeatureNSAMaxSize13,
|
||||
FeatureWavefrontSize32,
|
||||
FeatureShaderCyclesRegister]>;
|
||||
|
||||
|
|
|
@ -4153,12 +4153,11 @@ static void packImage16bitOpsToDwords(MachineIRBuilder &B, MachineInstr &MI,
|
|||
|
||||
Register AddrReg = SrcOp.getReg();
|
||||
|
||||
if (I < Intr->GradientStart) {
|
||||
AddrReg = B.buildBitcast(V2S16, AddrReg).getReg(0);
|
||||
PackedAddrs.push_back(AddrReg);
|
||||
} else if ((I >= Intr->GradientStart && I < Intr->CoordStart && !IsG16) ||
|
||||
(I >= Intr->CoordStart && !IsA16)) {
|
||||
if ((I < Intr->GradientStart) ||
|
||||
(I >= Intr->GradientStart && I < Intr->CoordStart && !IsG16) ||
|
||||
(I >= Intr->CoordStart && !IsA16)) {
|
||||
// Handle any gradient or coordinate operands that should not be packed
|
||||
AddrReg = B.buildBitcast(V2S16, AddrReg).getReg(0);
|
||||
PackedAddrs.push_back(AddrReg);
|
||||
} else {
|
||||
// Dz/dh, dz/dv and the last odd coord are packed with undef. Also, in 1D,
|
||||
|
@ -4202,9 +4201,8 @@ static void convertImageAddrToPacked(MachineIRBuilder &B, MachineInstr &MI,
|
|||
|
||||
int NumAddrRegs = AddrRegs.size();
|
||||
if (NumAddrRegs != 1) {
|
||||
// Round up to 8 elements for v5-v7
|
||||
// FIXME: Missing intermediate sized register classes and instructions.
|
||||
if (NumAddrRegs > 4 && !isPowerOf2_32(NumAddrRegs)) {
|
||||
// Above 8 elements round up to next power of 2 (i.e. 16).
|
||||
if (NumAddrRegs > 8 && !isPowerOf2_32(NumAddrRegs)) {
|
||||
const int RoundedNumRegs = NextPowerOf2(NumAddrRegs);
|
||||
auto Undef = B.buildUndef(S32);
|
||||
AddrRegs.append(RoundedNumRegs - NumAddrRegs, Undef.getReg(0));
|
||||
|
@ -4375,7 +4373,8 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
|
|||
IsG16);
|
||||
|
||||
// See also below in the non-a16 branch
|
||||
const bool UseNSA = PackedRegs.size() >= 3 && ST.hasNSAEncoding();
|
||||
const bool UseNSA = ST.hasNSAEncoding() && PackedRegs.size() >= 3 &&
|
||||
PackedRegs.size() <= ST.getNSAMaxSize();
|
||||
|
||||
if (!UseNSA && PackedRegs.size() > 1) {
|
||||
LLT PackedAddrTy = LLT::fixed_vector(2 * PackedRegs.size(), 16);
|
||||
|
@ -4412,7 +4411,8 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
|
|||
//
|
||||
// SIShrinkInstructions will convert NSA encodings to non-NSA after register
|
||||
// allocation when possible.
|
||||
const bool UseNSA = CorrectedNumVAddrs >= 3 && ST.hasNSAEncoding();
|
||||
const bool UseNSA = ST.hasNSAEncoding() && CorrectedNumVAddrs >= 3 &&
|
||||
CorrectedNumVAddrs <= ST.getNSAMaxSize();
|
||||
|
||||
if (!UseNSA && Intr->NumVAddrs > 1)
|
||||
convertImageAddrToPacked(B, MI, ArgOffset + Intr->VAddrStart,
|
||||
|
|
|
@ -264,6 +264,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
|||
HasGFX10A16(false),
|
||||
HasG16(false),
|
||||
HasNSAEncoding(false),
|
||||
NSAMaxSize(0),
|
||||
GFX10_AEncoding(false),
|
||||
GFX10_BEncoding(false),
|
||||
HasDLInsts(false),
|
||||
|
|
|
@ -136,6 +136,7 @@ protected:
|
|||
bool HasGFX10A16;
|
||||
bool HasG16;
|
||||
bool HasNSAEncoding;
|
||||
unsigned NSAMaxSize;
|
||||
bool GFX10_AEncoding;
|
||||
bool GFX10_BEncoding;
|
||||
bool HasDLInsts;
|
||||
|
@ -878,6 +879,8 @@ public:
|
|||
|
||||
bool hasNSAEncoding() const { return HasNSAEncoding; }
|
||||
|
||||
unsigned getNSAMaxSize() const { return NSAMaxSize; }
|
||||
|
||||
bool hasGFX10_AEncoding() const {
|
||||
return GFX10_AEncoding;
|
||||
}
|
||||
|
|
|
@ -6211,8 +6211,9 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
|
|||
//
|
||||
// SIShrinkInstructions will convert NSA encodings to non-NSA after register
|
||||
// allocation when possible.
|
||||
bool UseNSA =
|
||||
ST->hasFeature(AMDGPU::FeatureNSAEncoding) && VAddrs.size() >= 3;
|
||||
bool UseNSA = ST->hasFeature(AMDGPU::FeatureNSAEncoding) &&
|
||||
VAddrs.size() >= 3 &&
|
||||
VAddrs.size() <= (unsigned)ST->getNSAMaxSize();
|
||||
SDValue VAddr;
|
||||
if (!UseNSA)
|
||||
VAddr = getBuildDwordsVector(DAG, DL, VAddrs);
|
||||
|
|
|
@ -1669,7 +1669,8 @@ define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg
|
|||
; GFX10: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY27]](s32), [[COPY28]](s32)
|
||||
; GFX10: [[COPY29:%[0-9]+]]:_(s32) = COPY [[COPY20]](s32)
|
||||
; GFX10: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY29]](s32), [[DEF]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
|
||||
; GFX10: $vgpr0 = COPY [[UV]](s32)
|
||||
; GFX10: $vgpr1 = COPY [[UV1]](s32)
|
||||
|
@ -3511,7 +3512,8 @@ define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32>
|
|||
; GFX10: [[COPY27:%[0-9]+]]:_(s32) = COPY [[COPY20]](s32)
|
||||
; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
|
||||
; GFX10: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY27]](s32), [[DEF]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (s32) from custom "ImageResource")
|
||||
; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (s32) from custom "ImageResource")
|
||||
; GFX10: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
|
||||
; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
main_body:
|
||||
|
@ -3606,7 +3608,8 @@ define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4
|
|||
; GFX10: [[COPY27:%[0-9]+]]:_(s32) = COPY [[COPY20]](s32)
|
||||
; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
|
||||
; GFX10: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY27]](s32), [[DEF]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<2 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<2 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
|
||||
; GFX10: $vgpr0 = COPY [[UV]](s32)
|
||||
; GFX10: $vgpr1 = COPY [[UV1]](s32)
|
||||
|
|
|
@ -27,7 +27,8 @@ define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg
|
|||
; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32)
|
||||
; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
|
||||
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY14]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
|
||||
; GFX10: $vgpr0 = COPY [[UV]](s32)
|
||||
; GFX10: $vgpr1 = COPY [[UV1]](s32)
|
||||
|
@ -69,7 +70,9 @@ define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg
|
|||
; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
|
||||
; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
|
||||
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY20]](s32), [[COPY21]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY16]](s32), [[COPY17]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32)
|
||||
; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
|
||||
; GFX10: $vgpr0 = COPY [[UV]](s32)
|
||||
; GFX10: $vgpr1 = COPY [[UV1]](s32)
|
||||
|
@ -119,7 +122,11 @@ define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg
|
|||
; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY24]](s32), [[COPY25]](s32)
|
||||
; GFX10: [[COPY26:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
|
||||
; GFX10: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY26]](s32), [[DEF]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32)
|
||||
; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32)
|
||||
; GFX10: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32)
|
||||
; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
|
||||
; GFX10: $vgpr0 = COPY [[UV]](s32)
|
||||
; GFX10: $vgpr1 = COPY [[UV1]](s32)
|
||||
|
@ -159,7 +166,8 @@ define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inr
|
|||
; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
|
||||
; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
|
||||
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY15]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
|
||||
; GFX10: $vgpr0 = COPY [[UV]](s32)
|
||||
; GFX10: $vgpr1 = COPY [[UV1]](s32)
|
||||
|
@ -203,7 +211,9 @@ define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inr
|
|||
; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
|
||||
; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
|
||||
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY17]](s32), [[COPY18]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32)
|
||||
; GFX10: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
|
||||
; GFX10: $vgpr0 = COPY [[UV]](s32)
|
||||
; GFX10: $vgpr1 = COPY [[UV1]](s32)
|
||||
|
@ -242,7 +252,9 @@ define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> in
|
|||
; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
|
||||
; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
|
||||
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY14]](s32), [[COPY15]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32)
|
||||
; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
|
||||
; GFX10: $vgpr0 = COPY [[UV]](s32)
|
||||
; GFX10: $vgpr1 = COPY [[UV1]](s32)
|
||||
|
@ -285,7 +297,10 @@ define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> in
|
|||
; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
|
||||
; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
|
||||
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32)
|
||||
; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32)
|
||||
; GFX10: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
|
||||
; GFX10: $vgpr0 = COPY [[UV]](s32)
|
||||
; GFX10: $vgpr1 = COPY [[UV1]](s32)
|
||||
|
@ -326,7 +341,9 @@ define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32>
|
|||
; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
|
||||
; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
|
||||
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY15]](s32), [[COPY16]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32)
|
||||
; GFX10: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
|
||||
; GFX10: $vgpr0 = COPY [[UV]](s32)
|
||||
; GFX10: $vgpr1 = COPY [[UV1]](s32)
|
||||
|
@ -371,7 +388,11 @@ define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32>
|
|||
; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
|
||||
; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
|
||||
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY22]](s32), [[COPY23]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32)
|
||||
; GFX10: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32)
|
||||
; GFX10: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32)
|
||||
; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
|
||||
; GFX10: $vgpr0 = COPY [[UV]](s32)
|
||||
; GFX10: $vgpr1 = COPY [[UV1]](s32)
|
||||
|
@ -409,7 +430,8 @@ define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inre
|
|||
; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32)
|
||||
; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
|
||||
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY14]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
|
||||
; GFX10: $vgpr0 = COPY [[UV]](s32)
|
||||
; GFX10: $vgpr1 = COPY [[UV1]](s32)
|
||||
|
@ -451,7 +473,9 @@ define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inre
|
|||
; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
|
||||
; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
|
||||
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY20]](s32), [[COPY21]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY16]](s32), [[COPY17]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32)
|
||||
; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
|
||||
; GFX10: $vgpr0 = COPY [[UV]](s32)
|
||||
; GFX10: $vgpr1 = COPY [[UV1]](s32)
|
||||
|
@ -491,7 +515,8 @@ define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> in
|
|||
; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
|
||||
; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
|
||||
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY15]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
|
||||
; GFX10: $vgpr0 = COPY [[UV]](s32)
|
||||
; GFX10: $vgpr1 = COPY [[UV1]](s32)
|
||||
|
@ -535,7 +560,9 @@ define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> in
|
|||
; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
|
||||
; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
|
||||
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY17]](s32), [[COPY18]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32)
|
||||
; GFX10: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
|
||||
; GFX10: $vgpr0 = COPY [[UV]](s32)
|
||||
; GFX10: $vgpr1 = COPY [[UV1]](s32)
|
||||
|
@ -574,7 +601,9 @@ define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> i
|
|||
; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
|
||||
; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
|
||||
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY14]](s32), [[COPY15]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32)
|
||||
; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
|
||||
; GFX10: $vgpr0 = COPY [[UV]](s32)
|
||||
; GFX10: $vgpr1 = COPY [[UV1]](s32)
|
||||
|
@ -617,7 +646,10 @@ define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> i
|
|||
; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
|
||||
; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
|
||||
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32)
|
||||
; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32)
|
||||
; GFX10: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
|
||||
; GFX10: $vgpr0 = COPY [[UV]](s32)
|
||||
; GFX10: $vgpr1 = COPY [[UV1]](s32)
|
||||
|
@ -658,7 +690,9 @@ define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32>
|
|||
; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
|
||||
; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
|
||||
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY15]](s32), [[COPY16]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32)
|
||||
; GFX10: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
|
||||
; GFX10: $vgpr0 = COPY [[UV]](s32)
|
||||
; GFX10: $vgpr1 = COPY [[UV1]](s32)
|
||||
|
@ -703,7 +737,11 @@ define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32>
|
|||
; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
|
||||
; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
|
||||
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY22]](s32), [[COPY23]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32)
|
||||
; GFX10: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32)
|
||||
; GFX10: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32)
|
||||
; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
|
||||
; GFX10: $vgpr0 = COPY [[UV]](s32)
|
||||
; GFX10: $vgpr1 = COPY [[UV1]](s32)
|
||||
|
@ -750,7 +788,11 @@ define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32>
|
|||
; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
|
||||
; GFX10: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
|
||||
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (s32) from custom "ImageResource")
|
||||
; GFX10: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32)
|
||||
; GFX10: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32)
|
||||
; GFX10: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32)
|
||||
; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (s32) from custom "ImageResource")
|
||||
; GFX10: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
|
||||
; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
main_body:
|
||||
|
@ -793,7 +835,11 @@ define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4
|
|||
; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
|
||||
; GFX10: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
|
||||
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<2 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32)
|
||||
; GFX10: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32)
|
||||
; GFX10: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32)
|
||||
; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
|
||||
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<2 x s32>) from custom "ImageResource")
|
||||
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
|
||||
; GFX10: $vgpr0 = COPY [[UV]](s32)
|
||||
; GFX10: $vgpr1 = COPY [[UV1]](s32)
|
||||
|
|
|
@ -505,7 +505,7 @@ define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32>
|
|||
; GFX6-NEXT: s_mov_b32 s11, s13
|
||||
; GFX6-NEXT: s_wqm_b64 exec, exec
|
||||
; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
|
||||
; GFX6-NEXT: image_gather4_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1
|
||||
; GFX6-NEXT: image_gather4_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
|
|
|
@ -161,7 +161,7 @@ define amdgpu_ps <4 x float> @gather4_c_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32>
|
|||
; GFX6-NEXT: s_mov_b32 s11, s13
|
||||
; GFX6-NEXT: s_wqm_b64 exec, exec
|
||||
; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
|
||||
; GFX6-NEXT: image_gather4_c_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1
|
||||
; GFX6-NEXT: image_gather4_c_cl_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
|
@ -255,7 +255,7 @@ define amdgpu_ps <4 x float> @gather4_c_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32>
|
|||
; GFX6-NEXT: s_mov_b32 s11, s13
|
||||
; GFX6-NEXT: s_wqm_b64 exec, exec
|
||||
; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
|
||||
; GFX6-NEXT: image_gather4_c_b_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1
|
||||
; GFX6-NEXT: image_gather4_c_b_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
|
@ -299,7 +299,7 @@ define amdgpu_ps <4 x float> @gather4_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32>
|
|||
; GFX6-NEXT: s_mov_b32 s9, s11
|
||||
; GFX6-NEXT: s_mov_b32 s10, s12
|
||||
; GFX6-NEXT: s_mov_b32 s11, s13
|
||||
; GFX6-NEXT: image_gather4_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1
|
||||
; GFX6-NEXT: image_gather4_b_cl_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
|
@ -343,7 +343,7 @@ define amdgpu_ps <4 x float> @gather4_c_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i3
|
|||
; GFX6-NEXT: s_mov_b32 s11, s13
|
||||
; GFX6-NEXT: s_wqm_b64 exec, exec
|
||||
; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
|
||||
; GFX6-NEXT: image_gather4_c_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1
|
||||
; GFX6-NEXT: image_gather4_c_b_cl_o v[0:3], v[0:5], s[0:7], s[8:11] dmask:0x1
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
|
@ -428,7 +428,7 @@ define amdgpu_ps <4 x float> @gather4_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32>
|
|||
; GFX6-NEXT: s_mov_b32 s9, s11
|
||||
; GFX6-NEXT: s_mov_b32 s10, s12
|
||||
; GFX6-NEXT: s_mov_b32 s11, s13
|
||||
; GFX6-NEXT: image_gather4_c_l_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1
|
||||
; GFX6-NEXT: image_gather4_c_l_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
|
|
|
@ -35,15 +35,17 @@ main_body:
|
|||
define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {
|
||||
; GFX10-LABEL: sample_d_3d:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, v2
|
||||
; GFX10-NEXT: v_mov_b32_e32 v10, v3
|
||||
; GFX10-NEXT: v_mov_b32_e32 v11, 0xffff
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
||||
; GFX10-NEXT: s_lshl_b32 s12, s0, 16
|
||||
; GFX10-NEXT: v_and_or_b32 v0, v0, v9, v1
|
||||
; GFX10-NEXT: v_and_or_b32 v1, v2, v9, s12
|
||||
; GFX10-NEXT: v_and_or_b32 v2, v3, v9, v4
|
||||
; GFX10-NEXT: v_and_or_b32 v3, v5, v9, s12
|
||||
; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
|
||||
; GFX10-NEXT: v_and_or_b32 v3, v9, v11, s12
|
||||
; GFX10-NEXT: v_and_or_b32 v4, v10, v11, v4
|
||||
; GFX10-NEXT: v_and_or_b32 v2, v0, v11, v1
|
||||
; GFX10-NEXT: v_and_or_b32 v5, v5, v11, s12
|
||||
; GFX10-NEXT: image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
|
@ -131,12 +133,15 @@ main_body:
|
|||
define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
|
||||
; GFX10-LABEL: sample_c_d_cl_2d:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
||||
; GFX10-NEXT: v_mov_b32_e32 v8, v2
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, v3
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, v0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4
|
||||
; GFX10-NEXT: v_and_or_b32 v1, v1, v8, v2
|
||||
; GFX10-NEXT: v_and_or_b32 v2, v3, v8, v4
|
||||
; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v2, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v8
|
||||
; GFX10-NEXT: v_and_or_b32 v4, v9, v0, v4
|
||||
; GFX10-NEXT: v_and_or_b32 v3, v1, v0, v3
|
||||
; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
|
@ -255,12 +260,15 @@ main_body:
|
|||
define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
|
||||
; GFX10-LABEL: sample_c_cd_cl_2d:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
||||
; GFX10-NEXT: v_mov_b32_e32 v8, v2
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, v3
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, v0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4
|
||||
; GFX10-NEXT: v_and_or_b32 v1, v1, v8, v2
|
||||
; GFX10-NEXT: v_and_or_b32 v2, v3, v8, v4
|
||||
; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v2, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v8
|
||||
; GFX10-NEXT: v_and_or_b32 v4, v9, v0, v4
|
||||
; GFX10-NEXT: v_and_or_b32 v3, v1, v0, v3
|
||||
; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
|
@ -271,12 +279,17 @@ main_body:
|
|||
define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
|
||||
; GFX10-LABEL: sample_c_d_o_2darray_V1:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, v3
|
||||
; GFX10-NEXT: v_mov_b32_e32 v10, v2
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, v0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v3, v1
|
||||
; GFX10-NEXT: v_mov_b32_e32 v11, v4
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5
|
||||
; GFX10-NEXT: v_and_or_b32 v2, v2, v9, v3
|
||||
; GFX10-NEXT: v_and_or_b32 v3, v4, v9, v5
|
||||
; GFX10-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v9
|
||||
; GFX10-NEXT: v_and_or_b32 v5, v11, v0, v5
|
||||
; GFX10-NEXT: v_and_or_b32 v4, v10, v0, v1
|
||||
; GFX10-NEXT: image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
|
@ -287,12 +300,17 @@ main_body:
|
|||
define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
|
||||
; GFX10-LABEL: sample_c_d_o_2darray_V2:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, v3
|
||||
; GFX10-NEXT: v_mov_b32_e32 v10, v2
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, v0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v3, v1
|
||||
; GFX10-NEXT: v_mov_b32_e32 v11, v4
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5
|
||||
; GFX10-NEXT: v_and_or_b32 v2, v2, v9, v3
|
||||
; GFX10-NEXT: v_and_or_b32 v3, v4, v9, v5
|
||||
; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v9
|
||||
; GFX10-NEXT: v_and_or_b32 v5, v11, v0, v5
|
||||
; GFX10-NEXT: v_and_or_b32 v4, v10, v0, v1
|
||||
; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
|
|
|
@ -351,21 +351,28 @@ define amdgpu_ps void @cluster_image_sample(<8 x i32> inreg %src, <4 x i32> inre
|
|||
;
|
||||
; GFX10-LABEL: cluster_image_sample:
|
||||
; GFX10: ; %bb.0: ; %entry
|
||||
; GFX10-NEXT: v_cvt_f32_i32_e32 v2, v0
|
||||
; GFX10-NEXT: v_cvt_f32_i32_e32 v3, v1
|
||||
; GFX10-NEXT: v_mov_b32_e32 v13, 0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v4, 0
|
||||
; GFX10-NEXT: v_cvt_f32_i32_e32 v8, v0
|
||||
; GFX10-NEXT: v_cvt_f32_i32_e32 v9, v1
|
||||
; GFX10-NEXT: v_mov_b32_e32 v10, 1.0
|
||||
; GFX10-NEXT: v_add_f32_e32 v11, 1.0, v2
|
||||
; GFX10-NEXT: v_add_f32_e32 v12, 1.0, v3
|
||||
; GFX10-NEXT: v_add_f32_e32 v14, 2.0, v2
|
||||
; GFX10-NEXT: v_add_f32_e32 v15, 2.0, v3
|
||||
; GFX10-NEXT: image_sample_d v[2:5], [v11, v12, v13, v13, v13, v13], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
||||
; GFX10-NEXT: image_sample_d v[6:9], [v14, v15, v10, v10, v10, v10], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
||||
; GFX10-NEXT: v_mov_b32_e32 v5, v4
|
||||
; GFX10-NEXT: v_add_f32_e32 v2, 1.0, v8
|
||||
; GFX10-NEXT: v_add_f32_e32 v3, 1.0, v9
|
||||
; GFX10-NEXT: v_mov_b32_e32 v6, v4
|
||||
; GFX10-NEXT: v_mov_b32_e32 v7, v4
|
||||
; GFX10-NEXT: v_add_f32_e32 v8, 2.0, v8
|
||||
; GFX10-NEXT: v_add_f32_e32 v9, 2.0, v9
|
||||
; GFX10-NEXT: v_mov_b32_e32 v11, v10
|
||||
; GFX10-NEXT: v_mov_b32_e32 v12, v10
|
||||
; GFX10-NEXT: v_mov_b32_e32 v13, v10
|
||||
; GFX10-NEXT: s_clause 0x1
|
||||
; GFX10-NEXT: image_sample_d v[14:17], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
||||
; GFX10-NEXT: image_sample_d v[18:21], v[8:13], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: v_add_f32_e32 v5, v5, v9
|
||||
; GFX10-NEXT: v_add_f32_e32 v4, v4, v8
|
||||
; GFX10-NEXT: v_add_f32_e32 v3, v3, v7
|
||||
; GFX10-NEXT: v_add_f32_e32 v2, v2, v6
|
||||
; GFX10-NEXT: v_add_f32_e32 v5, v17, v21
|
||||
; GFX10-NEXT: v_add_f32_e32 v4, v16, v20
|
||||
; GFX10-NEXT: v_add_f32_e32 v3, v15, v19
|
||||
; GFX10-NEXT: v_add_f32_e32 v2, v14, v18
|
||||
; GFX10-NEXT: image_store v[2:5], v[0:1], s[12:19] dmask:0xf dim:SQ_RSRC_IMG_2D unorm
|
||||
; GFX10-NEXT: s_endpgm
|
||||
entry:
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-nsa-encoding -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,NONSA %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,NSA %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-nsa-encoding -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NONSA %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1010,NSA %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1030,NSA %s
|
||||
|
||||
; GCN-LABEL: {{^}}sample_2d:
|
||||
;
|
||||
|
@ -24,7 +25,8 @@ main_body:
|
|||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_d_3d:
|
||||
; NSA: image_sample_d v[0:3], [v3, v8, v7, v5, v4, v6, v0, v2, v1],
|
||||
; GFX1010: image_sample_d v[0:3], v[7:22],
|
||||
; GFX1030: image_sample_d v[0:3], [v3, v8, v7, v5, v4, v6, v0, v2, v1],
|
||||
define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %r, float %t, float %dsdh, float %dtdv, float %dsdv, float %drdv, float %drdh, float %dtdh) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32(i32 15, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
|
@ -33,8 +35,8 @@ main_body:
|
|||
|
||||
; GCN-LABEL: {{^}}sample_contig_nsa:
|
||||
; NONSA: image_sample_c_l v5, v[0:4],
|
||||
; NSA: image_sample_c_l v8, v[0:4],
|
||||
; NSA: image_sample v9, [v6, v7, v5],
|
||||
; NSA: image_sample_c_l v{{[0-9]+}}, v[0:4],
|
||||
; NSA: image_sample v{{[0-9]+}}, [v6, v7, v5],
|
||||
define amdgpu_ps <2 x float> @sample_contig_nsa(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s1, float %t1, float %r1, float %lod, float %r2, float %s2, float %t2) {
|
||||
main_body:
|
||||
%v1 = call float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32 1, float %zcompare, float %s1, float %t1, float %r1, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
|
@ -45,8 +47,8 @@ main_body:
|
|||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_nsa_nsa:
|
||||
; NSA: image_sample_c_l v8, [v1, v2, v3, v4, v0],
|
||||
; NSA: image_sample v9, [v6, v7, v5],
|
||||
; NSA: image_sample_c_l v{{[0-9]+}}, [v1, v2, v3, v4, v0],
|
||||
; NSA: image_sample v{{[0-9]+}}, [v6, v7, v5],
|
||||
define amdgpu_ps <2 x float> @sample_nsa_nsa(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %lod, float %zcompare, float %s1, float %t1, float %r1, float %r2, float %s2, float %t2) {
|
||||
main_body:
|
||||
%v1 = call float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32 1, float %zcompare, float %s1, float %t1, float %r1, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
|
@ -57,8 +59,8 @@ main_body:
|
|||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_nsa_contig:
|
||||
; NSA: image_sample_c_l v8, [v1, v2, v3, v4, v0],
|
||||
; NSA: image_sample v9, v[5:7],
|
||||
; NSA: image_sample_c_l v{{[0-9]+}}, [v1, v2, v3, v4, v0],
|
||||
; NSA: image_sample v{{[0-9]+}}, v[5:7],
|
||||
define amdgpu_ps <2 x float> @sample_nsa_contig(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %lod, float %zcompare, float %s1, float %t1, float %r1, float %s2, float %t2, float %r2) {
|
||||
main_body:
|
||||
%v1 = call float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32 1, float %zcompare, float %s1, float %t1, float %r1, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
|
@ -69,10 +71,10 @@ main_body:
|
|||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_contig_contig:
|
||||
; NSA: image_sample_c_l v8, v[0:4],
|
||||
; NSA: image_sample v9, v[5:7],
|
||||
; NONSA: image_sample_c_l v8, v[0:4],
|
||||
; NONSA: image_sample v9, v[5:7],
|
||||
; NSA: image_sample_c_l v{{[0-9]+}}, v[0:4],
|
||||
; NSA: image_sample v{{[0-9]+}}, v[5:7],
|
||||
; NONSA: image_sample_c_l v{{[0-9]+}}, v[0:4],
|
||||
; NONSA: image_sample v{{[0-9]+}}, v[5:7],
|
||||
define amdgpu_ps <2 x float> @sample_contig_contig(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s1, float %t1, float %r1, float %lod, float %s2, float %t2, float %r2) {
|
||||
main_body:
|
||||
%v1 = call float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32 1, float %zcompare, float %s1, float %t1, float %r1, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
|
|
|
@ -600,14 +600,17 @@ define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg
|
|||
;
|
||||
; GFX10-LABEL: sample_d_3d:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff
|
||||
; GFX10-NEXT: v_and_b32_e32 v6, v9, v6
|
||||
; GFX10-NEXT: v_and_b32_e32 v3, v9, v3
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, v9, v0
|
||||
; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6
|
||||
; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3
|
||||
; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0
|
||||
; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v3, v5, v6, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
|
||||
; GFX10-NEXT: v_mov_b32_e32 v12, v8
|
||||
; GFX10-NEXT: v_mov_b32_e32 v8, v2
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff
|
||||
; GFX10-NEXT: v_mov_b32_e32 v10, v5
|
||||
; GFX10-NEXT: v_and_b32_e32 v5, v2, v6
|
||||
; GFX10-NEXT: v_and_b32_e32 v3, v2, v3
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, v2, v0
|
||||
; GFX10-NEXT: v_lshl_or_b32 v11, v7, 16, v5
|
||||
; GFX10-NEXT: v_lshl_or_b32 v9, v4, 16, v3
|
||||
; GFX10-NEXT: v_lshl_or_b32 v7, v1, 16, v0
|
||||
; GFX10-NEXT: image_sample_d_g16 v[0:3], v[7:12], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
|
@ -1156,14 +1159,17 @@ define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32>
|
|||
;
|
||||
; GFX10-LABEL: sample_c_d_o_2darray_V1:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff
|
||||
; GFX10-NEXT: v_and_b32_e32 v6, v9, v6
|
||||
; GFX10-NEXT: v_and_b32_e32 v4, v9, v4
|
||||
; GFX10-NEXT: v_and_b32_e32 v2, v9, v2
|
||||
; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6
|
||||
; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4
|
||||
; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2
|
||||
; GFX10-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v4, v6, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16
|
||||
; GFX10-NEXT: v_mov_b32_e32 v13, v8
|
||||
; GFX10-NEXT: v_mov_b32_e32 v8, v0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, v1
|
||||
; GFX10-NEXT: v_and_b32_e32 v1, v0, v6
|
||||
; GFX10-NEXT: v_and_b32_e32 v4, v0, v4
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, v0, v2
|
||||
; GFX10-NEXT: v_lshl_or_b32 v12, v7, 16, v1
|
||||
; GFX10-NEXT: v_lshl_or_b32 v11, v5, 16, v4
|
||||
; GFX10-NEXT: v_lshl_or_b32 v10, v3, 16, v0
|
||||
; GFX10-NEXT: image_sample_c_d_o_g16 v0, v[8:13], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
|
@ -1190,14 +1196,17 @@ define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4
|
|||
;
|
||||
; GFX10-LABEL: sample_c_d_o_2darray_V2:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff
|
||||
; GFX10-NEXT: v_and_b32_e32 v6, v9, v6
|
||||
; GFX10-NEXT: v_and_b32_e32 v4, v9, v4
|
||||
; GFX10-NEXT: v_and_b32_e32 v2, v9, v2
|
||||
; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6
|
||||
; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4
|
||||
; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2
|
||||
; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v4, v6, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16
|
||||
; GFX10-NEXT: v_mov_b32_e32 v13, v8
|
||||
; GFX10-NEXT: v_mov_b32_e32 v8, v0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, v1
|
||||
; GFX10-NEXT: v_and_b32_e32 v1, v0, v6
|
||||
; GFX10-NEXT: v_and_b32_e32 v4, v0, v4
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, v0, v2
|
||||
; GFX10-NEXT: v_lshl_or_b32 v12, v7, 16, v1
|
||||
; GFX10-NEXT: v_lshl_or_b32 v11, v5, 16, v4
|
||||
; GFX10-NEXT: v_lshl_or_b32 v10, v3, 16, v0
|
||||
; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], v[8:13], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
|
|
|
@ -1739,14 +1739,13 @@ define amdgpu_ps float @sample_c_d_o_2darray_V1_tfe(<8 x i32> inreg %rsrc, <4 x
|
|||
; GFX10-LABEL: sample_c_d_o_2darray_V1_tfe:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_mov_b32_e32 v11, 0 ; encoding: [0x80,0x02,0x16,0x7e]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, v1 ; encoding: [0x01,0x03,0x12,0x7e]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v10, v0 ; encoding: [0x00,0x03,0x14,0x7e]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v12, v11 ; encoding: [0x0b,0x03,0x18,0x7e]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, v11 ; encoding: [0x0b,0x03,0x00,0x7e]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, v12 ; encoding: [0x0c,0x03,0x02,0x7e]
|
||||
; GFX10-NEXT: image_sample_c_d_o v[0:1], [v10, v9, v2, v3, v4, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe ; encoding: [0x2c,0x04,0xe9,0xf0,0x0a,0x00,0x40,0x00,0x09,0x02,0x03,0x04,0x05,0x06,0x07,0x08]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, v11 ; encoding: [0x0b,0x03,0x12,0x7e]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v10, v12 ; encoding: [0x0c,0x03,0x14,0x7e]
|
||||
; GFX10-NEXT: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe ; encoding: [0x28,0x04,0xe9,0xf0,0x00,0x09,0x40,0x00]
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
||||
; GFX10-NEXT: global_store_dword v11, v1, s[12:13] ; encoding: [0x00,0x80,0x70,0xdc,0x0b,0x01,0x0c,0x00]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, v9 ; encoding: [0x09,0x03,0x00,0x7e]
|
||||
; GFX10-NEXT: global_store_dword v11, v10, s[12:13] ; encoding: [0x00,0x80,0x70,0xdc,0x0b,0x0a,0x0c,0x00]
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
|
@ -1807,14 +1806,14 @@ define amdgpu_ps <4 x float> @sample_c_d_o_2darray_V2_tfe(<8 x i32> inreg %rsrc,
|
|||
;
|
||||
; GFX10-LABEL: sample_c_d_o_2darray_V2_tfe:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_mov_b32_e32 v11, v0 ; encoding: [0x00,0x03,0x16,0x7e]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, v2 ; encoding: [0x02,0x03,0x12,0x7e]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v10, v1 ; encoding: [0x01,0x03,0x14,0x7e]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
|
||||
; GFX10-NEXT: image_sample_c_d_o v[0:2], [v11, v10, v9, v3, v4, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe ; encoding: [0x2c,0x06,0xe9,0xf0,0x0b,0x00,0x40,0x00,0x0a,0x09,0x03,0x04,0x05,0x06,0x07,0x08]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, 0 ; encoding: [0x80,0x02,0x12,0x7e]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v10, v9 ; encoding: [0x09,0x03,0x14,0x7e]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v11, v9 ; encoding: [0x09,0x03,0x16,0x7e]
|
||||
; GFX10-NEXT: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe ; encoding: [0x28,0x06,0xe9,0xf0,0x00,0x09,0x40,0x00]
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, v9 ; encoding: [0x09,0x03,0x00,0x7e]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, v10 ; encoding: [0x0a,0x03,0x02,0x7e]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, v11 ; encoding: [0x0b,0x03,0x04,0x7e]
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
%v = call {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
|
||||
|
|
|
@ -47,9 +47,16 @@ main_body:
|
|||
define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, half %s, half %t, half %r) {
|
||||
; GFX10-LABEL: sample_d_3d:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_and_b32_e32 v6, 0xffff, v6
|
||||
; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6
|
||||
; GFX10-NEXT: image_sample_d v[0:3], [v0, v1, v2, v3, v4, v5, v6, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
|
||||
; GFX10-NEXT: v_mov_b32_e32 v15, v8
|
||||
; GFX10-NEXT: v_mov_b32_e32 v8, v0
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v6
|
||||
; GFX10-NEXT: v_mov_b32_e32 v13, v5
|
||||
; GFX10-NEXT: v_mov_b32_e32 v12, v4
|
||||
; GFX10-NEXT: v_mov_b32_e32 v11, v3
|
||||
; GFX10-NEXT: v_mov_b32_e32 v10, v2
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, v1
|
||||
; GFX10-NEXT: v_lshl_or_b32 v14, v7, 16, v0
|
||||
; GFX10-NEXT: image_sample_d v[0:3], v[8:15], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
;
|
||||
|
@ -132,9 +139,14 @@ main_body:
|
|||
define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {
|
||||
; GFX10-LABEL: sample_d_cl_2d:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
||||
; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4
|
||||
; GFX10-NEXT: image_sample_d_cl v[0:3], [v0, v1, v2, v3, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
||||
; GFX10-NEXT: v_mov_b32_e32 v11, v6
|
||||
; GFX10-NEXT: v_mov_b32_e32 v6, v0
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v4
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, v3
|
||||
; GFX10-NEXT: v_mov_b32_e32 v8, v2
|
||||
; GFX10-NEXT: v_mov_b32_e32 v7, v1
|
||||
; GFX10-NEXT: v_lshl_or_b32 v10, v5, 16, v0
|
||||
; GFX10-NEXT: image_sample_d_cl v[0:3], v[6:11], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
;
|
||||
|
@ -177,9 +189,15 @@ main_body:
|
|||
define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {
|
||||
; GFX10-LABEL: sample_c_d_cl_2d:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_and_b32_e32 v5, 0xffff, v5
|
||||
; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5
|
||||
; GFX10-NEXT: image_sample_c_d_cl v[0:3], [v0, v1, v2, v3, v4, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
||||
; GFX10-NEXT: v_mov_b32_e32 v13, v7
|
||||
; GFX10-NEXT: v_mov_b32_e32 v7, v0
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v5
|
||||
; GFX10-NEXT: v_mov_b32_e32 v11, v4
|
||||
; GFX10-NEXT: v_mov_b32_e32 v10, v3
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, v2
|
||||
; GFX10-NEXT: v_mov_b32_e32 v8, v1
|
||||
; GFX10-NEXT: v_lshl_or_b32 v12, v6, 16, v0
|
||||
; GFX10-NEXT: image_sample_c_d_cl v[0:3], v[7:13], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
;
|
||||
|
@ -302,9 +320,14 @@ main_body:
|
|||
define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {
|
||||
; GFX10-LABEL: sample_cd_cl_2d:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
||||
; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4
|
||||
; GFX10-NEXT: image_sample_cd_cl v[0:3], [v0, v1, v2, v3, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
||||
; GFX10-NEXT: v_mov_b32_e32 v11, v6
|
||||
; GFX10-NEXT: v_mov_b32_e32 v6, v0
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v4
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, v3
|
||||
; GFX10-NEXT: v_mov_b32_e32 v8, v2
|
||||
; GFX10-NEXT: v_mov_b32_e32 v7, v1
|
||||
; GFX10-NEXT: v_lshl_or_b32 v10, v5, 16, v0
|
||||
; GFX10-NEXT: image_sample_cd_cl v[0:3], v[6:11], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
;
|
||||
|
@ -347,9 +370,15 @@ main_body:
|
|||
define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {
|
||||
; GFX10-LABEL: sample_c_cd_cl_2d:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_and_b32_e32 v5, 0xffff, v5
|
||||
; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5
|
||||
; GFX10-NEXT: image_sample_c_cd_cl v[0:3], [v0, v1, v2, v3, v4, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
||||
; GFX10-NEXT: v_mov_b32_e32 v13, v7
|
||||
; GFX10-NEXT: v_mov_b32_e32 v7, v0
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v5
|
||||
; GFX10-NEXT: v_mov_b32_e32 v11, v4
|
||||
; GFX10-NEXT: v_mov_b32_e32 v10, v3
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, v2
|
||||
; GFX10-NEXT: v_mov_b32_e32 v8, v1
|
||||
; GFX10-NEXT: v_lshl_or_b32 v12, v6, 16, v0
|
||||
; GFX10-NEXT: image_sample_c_cd_cl v[0:3], v[7:13], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
;
|
||||
|
@ -371,9 +400,16 @@ main_body:
|
|||
define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice) {
|
||||
; GFX10-LABEL: sample_c_d_o_2darray_V1:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_and_b32_e32 v6, 0xffff, v6
|
||||
; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6
|
||||
; GFX10-NEXT: image_sample_c_d_o v0, [v0, v1, v2, v3, v4, v5, v6, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16
|
||||
; GFX10-NEXT: v_mov_b32_e32 v15, v8
|
||||
; GFX10-NEXT: v_mov_b32_e32 v8, v0
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v6
|
||||
; GFX10-NEXT: v_mov_b32_e32 v13, v5
|
||||
; GFX10-NEXT: v_mov_b32_e32 v12, v4
|
||||
; GFX10-NEXT: v_mov_b32_e32 v11, v3
|
||||
; GFX10-NEXT: v_mov_b32_e32 v10, v2
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, v1
|
||||
; GFX10-NEXT: v_lshl_or_b32 v14, v7, 16, v0
|
||||
; GFX10-NEXT: image_sample_c_d_o v0, v[8:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
;
|
||||
|
@ -395,9 +431,16 @@ main_body:
|
|||
define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice) {
|
||||
; GFX10-LABEL: sample_c_d_o_2darray_V2:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_and_b32_e32 v6, 0xffff, v6
|
||||
; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6
|
||||
; GFX10-NEXT: image_sample_c_d_o v[0:1], [v0, v1, v2, v3, v4, v5, v6, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16
|
||||
; GFX10-NEXT: v_mov_b32_e32 v15, v8
|
||||
; GFX10-NEXT: v_mov_b32_e32 v8, v0
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v6
|
||||
; GFX10-NEXT: v_mov_b32_e32 v13, v5
|
||||
; GFX10-NEXT: v_mov_b32_e32 v12, v4
|
||||
; GFX10-NEXT: v_mov_b32_e32 v11, v3
|
||||
; GFX10-NEXT: v_mov_b32_e32 v10, v2
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, v1
|
||||
; GFX10-NEXT: v_lshl_or_b32 v14, v7, 16, v0
|
||||
; GFX10-NEXT: image_sample_c_d_o v[0:1], v[8:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
;
|
||||
|
@ -489,26 +532,30 @@ main_body:
|
|||
define amdgpu_ps <4 x float> @sample_g16_noa16_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {
|
||||
; GFX10-LABEL: sample_g16_noa16_d_3d:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff
|
||||
; GFX10-NEXT: v_and_b32_e32 v3, v9, v3
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, v9, v0
|
||||
; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3
|
||||
; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0
|
||||
; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v3, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, v3
|
||||
; GFX10-NEXT: v_mov_b32_e32 v3, v2
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff
|
||||
; GFX10-NEXT: v_and_b32_e32 v9, v2, v9
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, v2, v0
|
||||
; GFX10-NEXT: v_lshl_or_b32 v4, v4, 16, v9
|
||||
; GFX10-NEXT: v_lshl_or_b32 v2, v1, 16, v0
|
||||
; GFX10-NEXT: image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10GISEL-LABEL: sample_g16_noa16_d_3d:
|
||||
; GFX10GISEL: ; %bb.0: ; %main_body
|
||||
; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff
|
||||
; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
||||
; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v2
|
||||
; GFX10GISEL-NEXT: v_mov_b32_e32 v10, v3
|
||||
; GFX10GISEL-NEXT: v_mov_b32_e32 v11, 0xffff
|
||||
; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
|
||||
; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
||||
; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
|
||||
; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v9, v1
|
||||
; GFX10GISEL-NEXT: v_and_or_b32 v1, v2, v9, s12
|
||||
; GFX10GISEL-NEXT: v_and_or_b32 v2, v3, v9, v4
|
||||
; GFX10GISEL-NEXT: v_and_or_b32 v3, v5, v9, s12
|
||||
; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
|
||||
; GFX10GISEL-NEXT: v_and_or_b32 v3, v9, v11, s12
|
||||
; GFX10GISEL-NEXT: v_and_or_b32 v4, v10, v11, v4
|
||||
; GFX10GISEL-NEXT: v_and_or_b32 v2, v0, v11, v1
|
||||
; GFX10GISEL-NEXT: v_and_or_b32 v5, v5, v11, s12
|
||||
; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
|
||||
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10GISEL-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
|
@ -636,23 +683,28 @@ main_body:
|
|||
define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
|
||||
; GFX10-LABEL: sample_g16_noa16_c_d_cl_2d:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff
|
||||
; GFX10-NEXT: v_and_b32_e32 v3, v8, v3
|
||||
; GFX10-NEXT: v_and_b32_e32 v1, v8, v1
|
||||
; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3
|
||||
; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1
|
||||
; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
||||
; GFX10-NEXT: v_mov_b32_e32 v8, v2
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, v0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff
|
||||
; GFX10-NEXT: v_and_b32_e32 v3, v0, v3
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, v0, v1
|
||||
; GFX10-NEXT: v_lshl_or_b32 v4, v4, 16, v3
|
||||
; GFX10-NEXT: v_lshl_or_b32 v3, v8, 16, v0
|
||||
; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10GISEL-LABEL: sample_g16_noa16_c_d_cl_2d:
|
||||
; GFX10GISEL: ; %bb.0: ; %main_body
|
||||
; GFX10GISEL-NEXT: v_mov_b32_e32 v8, 0xffff
|
||||
; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
||||
; GFX10GISEL-NEXT: v_mov_b32_e32 v8, v2
|
||||
; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v3
|
||||
; GFX10GISEL-NEXT: v_mov_b32_e32 v2, v0
|
||||
; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0xffff
|
||||
; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
|
||||
; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v8, v2
|
||||
; GFX10GISEL-NEXT: v_and_or_b32 v2, v3, v8, v4
|
||||
; GFX10GISEL-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v2, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
||||
; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v8
|
||||
; GFX10GISEL-NEXT: v_and_or_b32 v4, v9, v0, v4
|
||||
; GFX10GISEL-NEXT: v_and_or_b32 v3, v1, v0, v3
|
||||
; GFX10GISEL-NEXT: image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
||||
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10GISEL-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
|
@ -828,23 +880,28 @@ main_body:
|
|||
define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
|
||||
; GFX10-LABEL: sample_g16_noa16_c_cd_cl_2d:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff
|
||||
; GFX10-NEXT: v_and_b32_e32 v3, v8, v3
|
||||
; GFX10-NEXT: v_and_b32_e32 v1, v8, v1
|
||||
; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3
|
||||
; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1
|
||||
; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v3, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
||||
; GFX10-NEXT: v_mov_b32_e32 v8, v2
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, v0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff
|
||||
; GFX10-NEXT: v_and_b32_e32 v3, v0, v3
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, v0, v1
|
||||
; GFX10-NEXT: v_lshl_or_b32 v4, v4, 16, v3
|
||||
; GFX10-NEXT: v_lshl_or_b32 v3, v8, 16, v0
|
||||
; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_cl_2d:
|
||||
; GFX10GISEL: ; %bb.0: ; %main_body
|
||||
; GFX10GISEL-NEXT: v_mov_b32_e32 v8, 0xffff
|
||||
; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
||||
; GFX10GISEL-NEXT: v_mov_b32_e32 v8, v2
|
||||
; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v3
|
||||
; GFX10GISEL-NEXT: v_mov_b32_e32 v2, v0
|
||||
; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0xffff
|
||||
; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
|
||||
; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v8, v2
|
||||
; GFX10GISEL-NEXT: v_and_or_b32 v2, v3, v8, v4
|
||||
; GFX10GISEL-NEXT: image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v2, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
||||
; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v8
|
||||
; GFX10GISEL-NEXT: v_and_or_b32 v4, v9, v0, v4
|
||||
; GFX10GISEL-NEXT: v_and_or_b32 v3, v1, v0, v3
|
||||
; GFX10GISEL-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
||||
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10GISEL-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
|
@ -855,23 +912,32 @@ main_body:
|
|||
define amdgpu_ps float @sample_g16_noa16_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
|
||||
; GFX10-LABEL: sample_g16_noa16_c_d_o_2darray_V1:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff
|
||||
; GFX10-NEXT: v_and_b32_e32 v4, v9, v4
|
||||
; GFX10-NEXT: v_and_b32_e32 v2, v9, v2
|
||||
; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4
|
||||
; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2
|
||||
; GFX10-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v4, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, v2
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, v0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff
|
||||
; GFX10-NEXT: v_mov_b32_e32 v10, v3
|
||||
; GFX10-NEXT: v_mov_b32_e32 v3, v1
|
||||
; GFX10-NEXT: v_and_b32_e32 v1, v0, v4
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, v0, v9
|
||||
; GFX10-NEXT: v_lshl_or_b32 v5, v5, 16, v1
|
||||
; GFX10-NEXT: v_lshl_or_b32 v4, v10, 16, v0
|
||||
; GFX10-NEXT: image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10GISEL-LABEL: sample_g16_noa16_c_d_o_2darray_V1:
|
||||
; GFX10GISEL: ; %bb.0: ; %main_body
|
||||
; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff
|
||||
; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
|
||||
; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v3
|
||||
; GFX10GISEL-NEXT: v_mov_b32_e32 v10, v2
|
||||
; GFX10GISEL-NEXT: v_mov_b32_e32 v2, v0
|
||||
; GFX10GISEL-NEXT: v_mov_b32_e32 v3, v1
|
||||
; GFX10GISEL-NEXT: v_mov_b32_e32 v11, v4
|
||||
; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0xffff
|
||||
; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
|
||||
; GFX10GISEL-NEXT: v_and_or_b32 v2, v2, v9, v3
|
||||
; GFX10GISEL-NEXT: v_and_or_b32 v3, v4, v9, v5
|
||||
; GFX10GISEL-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
|
||||
; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v9
|
||||
; GFX10GISEL-NEXT: v_and_or_b32 v5, v11, v0, v5
|
||||
; GFX10GISEL-NEXT: v_and_or_b32 v4, v10, v0, v1
|
||||
; GFX10GISEL-NEXT: image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
|
||||
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10GISEL-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
|
@ -882,23 +948,32 @@ main_body:
|
|||
define amdgpu_ps <2 x float> @sample_g16_noa16_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
|
||||
; GFX10-LABEL: sample_g16_noa16_c_d_o_2darray_V2:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff
|
||||
; GFX10-NEXT: v_and_b32_e32 v4, v9, v4
|
||||
; GFX10-NEXT: v_and_b32_e32 v2, v9, v2
|
||||
; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4
|
||||
; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2
|
||||
; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v4, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, v2
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, v0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff
|
||||
; GFX10-NEXT: v_mov_b32_e32 v10, v3
|
||||
; GFX10-NEXT: v_mov_b32_e32 v3, v1
|
||||
; GFX10-NEXT: v_and_b32_e32 v1, v0, v4
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, v0, v9
|
||||
; GFX10-NEXT: v_lshl_or_b32 v5, v5, 16, v1
|
||||
; GFX10-NEXT: v_lshl_or_b32 v4, v10, 16, v0
|
||||
; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10GISEL-LABEL: sample_g16_noa16_c_d_o_2darray_V2:
|
||||
; GFX10GISEL: ; %bb.0: ; %main_body
|
||||
; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff
|
||||
; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
|
||||
; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v3
|
||||
; GFX10GISEL-NEXT: v_mov_b32_e32 v10, v2
|
||||
; GFX10GISEL-NEXT: v_mov_b32_e32 v2, v0
|
||||
; GFX10GISEL-NEXT: v_mov_b32_e32 v3, v1
|
||||
; GFX10GISEL-NEXT: v_mov_b32_e32 v11, v4
|
||||
; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0xffff
|
||||
; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
|
||||
; GFX10GISEL-NEXT: v_and_or_b32 v2, v2, v9, v3
|
||||
; GFX10GISEL-NEXT: v_and_or_b32 v3, v4, v9, v5
|
||||
; GFX10GISEL-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
|
||||
; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v9
|
||||
; GFX10GISEL-NEXT: v_and_or_b32 v5, v11, v0, v5
|
||||
; GFX10GISEL-NEXT: v_and_or_b32 v4, v10, v0, v1
|
||||
; GFX10GISEL-NEXT: image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
|
||||
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10GISEL-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
|
|
|
@ -31,12 +31,14 @@ main_body:
|
|||
define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {
|
||||
; GFX10-LABEL: sample_d_3d:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff ; encoding: [0xff,0x02,0x12,0x7e,0xff,0xff,0x00,0x00]
|
||||
; GFX10-NEXT: v_and_b32_e32 v3, v9, v3 ; encoding: [0x09,0x07,0x06,0x36]
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, v9, v0 ; encoding: [0x09,0x01,0x00,0x36]
|
||||
; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 ; encoding: [0x03,0x00,0x6f,0xd7,0x04,0x21,0x0d,0x04]
|
||||
; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 ; encoding: [0x00,0x00,0x6f,0xd7,0x01,0x21,0x01,0x04]
|
||||
; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v3, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x15,0x0f,0x88,0xf0,0x00,0x00,0x40,0x00,0x02,0x03,0x05,0x06,0x07,0x08,0x00,0x00]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, v3 ; encoding: [0x03,0x03,0x12,0x7e]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v3, v2 ; encoding: [0x02,0x03,0x06,0x7e]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff ; encoding: [0xff,0x02,0x04,0x7e,0xff,0xff,0x00,0x00]
|
||||
; GFX10-NEXT: v_and_b32_e32 v9, v2, v9 ; encoding: [0x02,0x13,0x12,0x36]
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, v2, v0 ; encoding: [0x02,0x01,0x00,0x36]
|
||||
; GFX10-NEXT: v_lshl_or_b32 v4, v4, 16, v9 ; encoding: [0x04,0x00,0x6f,0xd7,0x04,0x21,0x25,0x04]
|
||||
; GFX10-NEXT: v_lshl_or_b32 v2, v1, 16, v0 ; encoding: [0x02,0x00,0x6f,0xd7,0x01,0x21,0x01,0x04]
|
||||
; GFX10-NEXT: image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x11,0x0f,0x88,0xf0,0x02,0x00,0x40,0x00]
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
|
@ -112,12 +114,14 @@ main_body:
|
|||
define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
|
||||
; GFX10-LABEL: sample_c_d_cl_2d:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff ; encoding: [0xff,0x02,0x10,0x7e,0xff,0xff,0x00,0x00]
|
||||
; GFX10-NEXT: v_and_b32_e32 v3, v8, v3 ; encoding: [0x08,0x07,0x06,0x36]
|
||||
; GFX10-NEXT: v_and_b32_e32 v1, v8, v1 ; encoding: [0x08,0x03,0x02,0x36]
|
||||
; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 ; encoding: [0x03,0x00,0x6f,0xd7,0x04,0x21,0x0d,0x04]
|
||||
; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 ; encoding: [0x01,0x00,0x6f,0xd7,0x02,0x21,0x05,0x04]
|
||||
; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0d,0x0f,0xac,0xf0,0x00,0x00,0x40,0x00,0x01,0x03,0x05,0x06,0x07,0x00,0x00,0x00]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v8, v2 ; encoding: [0x02,0x03,0x10,0x7e]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff ; encoding: [0xff,0x02,0x00,0x7e,0xff,0xff,0x00,0x00]
|
||||
; GFX10-NEXT: v_and_b32_e32 v3, v0, v3 ; encoding: [0x00,0x07,0x06,0x36]
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x36]
|
||||
; GFX10-NEXT: v_lshl_or_b32 v4, v4, 16, v3 ; encoding: [0x04,0x00,0x6f,0xd7,0x04,0x21,0x0d,0x04]
|
||||
; GFX10-NEXT: v_lshl_or_b32 v3, v8, 16, v0 ; encoding: [0x03,0x00,0x6f,0xd7,0x08,0x21,0x01,0x04]
|
||||
; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x09,0x0f,0xac,0xf0,0x02,0x00,0x40,0x00]
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
|
@ -220,12 +224,14 @@ main_body:
|
|||
define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
|
||||
; GFX10-LABEL: sample_c_cd_cl_2d:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff ; encoding: [0xff,0x02,0x10,0x7e,0xff,0xff,0x00,0x00]
|
||||
; GFX10-NEXT: v_and_b32_e32 v3, v8, v3 ; encoding: [0x08,0x07,0x06,0x36]
|
||||
; GFX10-NEXT: v_and_b32_e32 v1, v8, v1 ; encoding: [0x08,0x03,0x02,0x36]
|
||||
; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 ; encoding: [0x03,0x00,0x6f,0xd7,0x04,0x21,0x0d,0x04]
|
||||
; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 ; encoding: [0x01,0x00,0x6f,0xd7,0x02,0x21,0x05,0x04]
|
||||
; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v3, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0d,0x0f,0xac,0xf1,0x00,0x00,0x40,0x00,0x01,0x03,0x05,0x06,0x07,0x00,0x00,0x00]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v8, v2 ; encoding: [0x02,0x03,0x10,0x7e]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff ; encoding: [0xff,0x02,0x00,0x7e,0xff,0xff,0x00,0x00]
|
||||
; GFX10-NEXT: v_and_b32_e32 v3, v0, v3 ; encoding: [0x00,0x07,0x06,0x36]
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x36]
|
||||
; GFX10-NEXT: v_lshl_or_b32 v4, v4, 16, v3 ; encoding: [0x04,0x00,0x6f,0xd7,0x04,0x21,0x0d,0x04]
|
||||
; GFX10-NEXT: v_lshl_or_b32 v3, v8, 16, v0 ; encoding: [0x03,0x00,0x6f,0xd7,0x08,0x21,0x01,0x04]
|
||||
; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x09,0x0f,0xac,0xf1,0x02,0x00,0x40,0x00]
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
|
@ -236,12 +242,16 @@ main_body:
|
|||
define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
|
||||
; GFX10-LABEL: sample_c_d_o_2darray_V1:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff ; encoding: [0xff,0x02,0x12,0x7e,0xff,0xff,0x00,0x00]
|
||||
; GFX10-NEXT: v_and_b32_e32 v4, v9, v4 ; encoding: [0x09,0x09,0x08,0x36]
|
||||
; GFX10-NEXT: v_and_b32_e32 v2, v9, v2 ; encoding: [0x09,0x05,0x04,0x36]
|
||||
; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 ; encoding: [0x04,0x00,0x6f,0xd7,0x05,0x21,0x11,0x04]
|
||||
; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 ; encoding: [0x02,0x00,0x6f,0xd7,0x03,0x21,0x09,0x04]
|
||||
; GFX10-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v4, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2d,0x04,0xe8,0xf0,0x00,0x00,0x40,0x00,0x01,0x02,0x04,0x06,0x07,0x08,0x00,0x00]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, v2 ; encoding: [0x02,0x03,0x12,0x7e]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff ; encoding: [0xff,0x02,0x00,0x7e,0xff,0xff,0x00,0x00]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v10, v3 ; encoding: [0x03,0x03,0x14,0x7e]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v3, v1 ; encoding: [0x01,0x03,0x06,0x7e]
|
||||
; GFX10-NEXT: v_and_b32_e32 v1, v0, v4 ; encoding: [0x00,0x09,0x02,0x36]
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, v0, v9 ; encoding: [0x00,0x13,0x00,0x36]
|
||||
; GFX10-NEXT: v_lshl_or_b32 v5, v5, 16, v1 ; encoding: [0x05,0x00,0x6f,0xd7,0x05,0x21,0x05,0x04]
|
||||
; GFX10-NEXT: v_lshl_or_b32 v4, v10, 16, v0 ; encoding: [0x04,0x00,0x6f,0xd7,0x0a,0x21,0x01,0x04]
|
||||
; GFX10-NEXT: image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x29,0x04,0xe8,0xf0,0x02,0x00,0x40,0x00]
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
|
@ -252,12 +262,16 @@ main_body:
|
|||
define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
|
||||
; GFX10-LABEL: sample_c_d_o_2darray_V2:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff ; encoding: [0xff,0x02,0x12,0x7e,0xff,0xff,0x00,0x00]
|
||||
; GFX10-NEXT: v_and_b32_e32 v4, v9, v4 ; encoding: [0x09,0x09,0x08,0x36]
|
||||
; GFX10-NEXT: v_and_b32_e32 v2, v9, v2 ; encoding: [0x09,0x05,0x04,0x36]
|
||||
; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 ; encoding: [0x04,0x00,0x6f,0xd7,0x05,0x21,0x11,0x04]
|
||||
; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 ; encoding: [0x02,0x00,0x6f,0xd7,0x03,0x21,0x09,0x04]
|
||||
; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v4, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2d,0x06,0xe8,0xf0,0x00,0x00,0x40,0x00,0x01,0x02,0x04,0x06,0x07,0x08,0x00,0x00]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, v2 ; encoding: [0x02,0x03,0x12,0x7e]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff ; encoding: [0xff,0x02,0x00,0x7e,0xff,0xff,0x00,0x00]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v10, v3 ; encoding: [0x03,0x03,0x14,0x7e]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v3, v1 ; encoding: [0x01,0x03,0x06,0x7e]
|
||||
; GFX10-NEXT: v_and_b32_e32 v1, v0, v4 ; encoding: [0x00,0x09,0x02,0x36]
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, v0, v9 ; encoding: [0x00,0x13,0x00,0x36]
|
||||
; GFX10-NEXT: v_lshl_or_b32 v5, v5, 16, v1 ; encoding: [0x05,0x00,0x6f,0xd7,0x05,0x21,0x05,0x04]
|
||||
; GFX10-NEXT: v_lshl_or_b32 v4, v10, 16, v0 ; encoding: [0x04,0x00,0x6f,0xd7,0x0a,0x21,0x01,0x04]
|
||||
; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x29,0x06,0xe8,0xf0,0x02,0x00,0x40,0x00]
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
|
|
|
@ -31,12 +31,14 @@ main_body:
|
|||
define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {
|
||||
; GFX10-LABEL: sample_d_3d:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff
|
||||
; GFX10-NEXT: v_and_b32_e32 v3, v9, v3
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, v9, v0
|
||||
; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3
|
||||
; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0
|
||||
; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v3, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, v3
|
||||
; GFX10-NEXT: v_mov_b32_e32 v3, v2
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff
|
||||
; GFX10-NEXT: v_and_b32_e32 v9, v2, v9
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, v2, v0
|
||||
; GFX10-NEXT: v_lshl_or_b32 v4, v4, 16, v9
|
||||
; GFX10-NEXT: v_lshl_or_b32 v2, v1, 16, v0
|
||||
; GFX10-NEXT: image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
|
@ -112,12 +114,14 @@ main_body:
|
|||
define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
|
||||
; GFX10-LABEL: sample_c_d_cl_2d:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff
|
||||
; GFX10-NEXT: v_and_b32_e32 v3, v8, v3
|
||||
; GFX10-NEXT: v_and_b32_e32 v1, v8, v1
|
||||
; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3
|
||||
; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1
|
||||
; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
||||
; GFX10-NEXT: v_mov_b32_e32 v8, v2
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, v0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff
|
||||
; GFX10-NEXT: v_and_b32_e32 v3, v0, v3
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, v0, v1
|
||||
; GFX10-NEXT: v_lshl_or_b32 v4, v4, 16, v3
|
||||
; GFX10-NEXT: v_lshl_or_b32 v3, v8, 16, v0
|
||||
; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
|
@ -220,12 +224,14 @@ main_body:
|
|||
define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
|
||||
; GFX10-LABEL: sample_c_cd_cl_2d:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff
|
||||
; GFX10-NEXT: v_and_b32_e32 v3, v8, v3
|
||||
; GFX10-NEXT: v_and_b32_e32 v1, v8, v1
|
||||
; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3
|
||||
; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1
|
||||
; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v3, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
||||
; GFX10-NEXT: v_mov_b32_e32 v8, v2
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, v0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff
|
||||
; GFX10-NEXT: v_and_b32_e32 v3, v0, v3
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, v0, v1
|
||||
; GFX10-NEXT: v_lshl_or_b32 v4, v4, 16, v3
|
||||
; GFX10-NEXT: v_lshl_or_b32 v3, v8, 16, v0
|
||||
; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
|
@ -236,12 +242,16 @@ main_body:
|
|||
define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
|
||||
; GFX10-LABEL: sample_c_d_o_2darray_V1:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff
|
||||
; GFX10-NEXT: v_and_b32_e32 v4, v9, v4
|
||||
; GFX10-NEXT: v_and_b32_e32 v2, v9, v2
|
||||
; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4
|
||||
; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2
|
||||
; GFX10-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v4, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, v2
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, v0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff
|
||||
; GFX10-NEXT: v_mov_b32_e32 v10, v3
|
||||
; GFX10-NEXT: v_mov_b32_e32 v3, v1
|
||||
; GFX10-NEXT: v_and_b32_e32 v1, v0, v4
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, v0, v9
|
||||
; GFX10-NEXT: v_lshl_or_b32 v5, v5, 16, v1
|
||||
; GFX10-NEXT: v_lshl_or_b32 v4, v10, 16, v0
|
||||
; GFX10-NEXT: image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
|
@ -252,12 +262,16 @@ main_body:
|
|||
define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
|
||||
; GFX10-LABEL: sample_c_d_o_2darray_V2:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff
|
||||
; GFX10-NEXT: v_and_b32_e32 v4, v9, v4
|
||||
; GFX10-NEXT: v_and_b32_e32 v2, v9, v2
|
||||
; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4
|
||||
; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2
|
||||
; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v4, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
|
||||
; GFX10-NEXT: v_mov_b32_e32 v9, v2
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, v0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff
|
||||
; GFX10-NEXT: v_mov_b32_e32 v10, v3
|
||||
; GFX10-NEXT: v_mov_b32_e32 v3, v1
|
||||
; GFX10-NEXT: v_and_b32_e32 v1, v0, v4
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, v0, v9
|
||||
; GFX10-NEXT: v_lshl_or_b32 v5, v5, 16, v1
|
||||
; GFX10-NEXT: v_lshl_or_b32 v4, v10, 16, v0
|
||||
; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
|
|
Loading…
Reference in New Issue