forked from OSchip/llvm-project
AMDGPU/GlobalISel: Fold constant offset vector extract indexes
Handle dynamic vector extracts that use an index that's an add of a constant offset into moving the base subregister of the indexing operation. Force the add into the loop in regbankselect, which will be recognized when selected.
This commit is contained in:
parent
9dc9f7ca14
commit
e3d352c541
|
@ -1792,6 +1792,30 @@ bool AMDGPUInstructionSelector::selectG_PTR_MASK(MachineInstr &I) const {
|
|||
return true;
|
||||
}
|
||||
|
||||
/// Return the register to use for the index value, and the subregister to use
|
||||
/// for the indirectly accessed register.
|
||||
static std::pair<Register, unsigned>
|
||||
computeIndirectRegIndex(MachineRegisterInfo &MRI,
|
||||
const SIRegisterInfo &TRI,
|
||||
const TargetRegisterClass *SuperRC,
|
||||
Register IdxReg,
|
||||
unsigned EltSize) {
|
||||
Register IdxBaseReg;
|
||||
int Offset;
|
||||
MachineInstr *Unused;
|
||||
|
||||
std::tie(IdxBaseReg, Offset, Unused)
|
||||
= AMDGPU::getBaseWithConstantOffset(MRI, IdxReg);
|
||||
|
||||
ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SuperRC, EltSize);
|
||||
|
||||
// Skip out of bounds offsets, or else we would end up using an undefined
|
||||
// register.
|
||||
if (static_cast<unsigned>(Offset) >= SubRegs.size())
|
||||
return std::make_pair(IdxReg, SubRegs[0]);
|
||||
return std::make_pair(IdxBaseReg, SubRegs[Offset]);
|
||||
}
|
||||
|
||||
bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
|
||||
MachineInstr &MI) const {
|
||||
Register DstReg = MI.getOperand(0).getReg();
|
||||
|
@ -1823,7 +1847,9 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
|
|||
const DebugLoc &DL = MI.getDebugLoc();
|
||||
const bool Is64 = DstTy.getSizeInBits() == 64;
|
||||
|
||||
unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
|
||||
unsigned SubReg;
|
||||
std::tie(IdxReg, SubReg) = computeIndirectRegIndex(*MRI, TRI, SrcRC, IdxReg,
|
||||
DstTy.getSizeInBits() / 8);
|
||||
|
||||
if (SrcRB->getID() == AMDGPU::SGPRRegBankID) {
|
||||
if (DstTy.getSizeInBits() != 32 && !Is64)
|
||||
|
|
|
@ -69,6 +69,8 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPURegisterBankInfo.h"
|
||||
|
||||
#include "AMDGPUGlobalISelUtils.h"
|
||||
#include "AMDGPUInstrInfo.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
||||
|
@ -76,8 +78,8 @@
|
|||
#include "SIRegisterInfo.h"
|
||||
#include "llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h"
|
||||
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
|
||||
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
|
||||
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
|
||||
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
|
||||
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
|
||||
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
|
||||
#include "llvm/CodeGen/TargetRegisterInfo.h"
|
||||
|
@ -1975,7 +1977,13 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
|
|||
|
||||
assert(OpdMapper.getVRegs(1).empty() && OpdMapper.getVRegs(2).empty());
|
||||
|
||||
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
|
||||
Register DstReg = MI.getOperand(0).getReg();
|
||||
Register SrcReg = MI.getOperand(1).getReg();
|
||||
|
||||
const LLT S32 = LLT::scalar(32);
|
||||
LLT DstTy = MRI.getType(DstReg);
|
||||
LLT SrcTy = MRI.getType(SrcReg);
|
||||
|
||||
MachineIRBuilder B(MI);
|
||||
|
||||
const ValueMapping &DstMapping
|
||||
|
@ -1983,10 +1991,40 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
|
|||
const RegisterBank *DstBank = DstMapping.BreakDown[0].RegBank;
|
||||
const RegisterBank *SrcBank =
|
||||
OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
|
||||
const RegisterBank *IdxBank =
|
||||
OpdMapper.getInstrMapping().getOperandMapping(2).BreakDown[0].RegBank;
|
||||
|
||||
Register DstReg = MI.getOperand(0).getReg();
|
||||
Register SrcReg = MI.getOperand(1).getReg();
|
||||
Register IdxReg = MI.getOperand(2).getReg();
|
||||
Register BaseIdxReg;
|
||||
unsigned ConstOffset;
|
||||
MachineInstr *OffsetDef;
|
||||
std::tie(BaseIdxReg, ConstOffset, OffsetDef) =
|
||||
AMDGPU::getBaseWithConstantOffset(MRI, MI.getOperand(2).getReg());
|
||||
|
||||
// See if the index is an add of a constant which will be foldable by moving
|
||||
// the base register of the index later if this is going to be executed in a
|
||||
// waterfall loop. This is essentially to reassociate the add of a constant
|
||||
// with the readfirstlane.
|
||||
bool ShouldMoveIndexIntoLoop = IdxBank != &AMDGPU::SGPRRegBank &&
|
||||
ConstOffset > 0 &&
|
||||
ConstOffset < SrcTy.getNumElements();
|
||||
|
||||
// Re-insert the constant offset add inside the waterfall loop.
|
||||
auto ReinsertIndexAdd = [=, &B, &MRI](MachineInstr &IdxUseInstr,
|
||||
unsigned OpIdx) {
|
||||
Register WaterfallIdx = IdxUseInstr.getOperand(OpIdx).getReg();
|
||||
B.setInsertPt(*IdxUseInstr.getParent(), IdxUseInstr.getIterator());
|
||||
|
||||
auto MaterializedOffset = B.buildConstant(S32, ConstOffset);
|
||||
|
||||
auto Add = B.buildAdd(S32, WaterfallIdx, MaterializedOffset);
|
||||
MRI.setRegBank(MaterializedOffset.getReg(0), AMDGPU::SGPRRegBank);
|
||||
MRI.setRegBank(Add.getReg(0), AMDGPU::SGPRRegBank);
|
||||
IdxUseInstr.getOperand(OpIdx).setReg(Add.getReg(0));
|
||||
};
|
||||
|
||||
// Move the base register. We'll re-insert the add later.
|
||||
if (ShouldMoveIndexIntoLoop)
|
||||
MI.getOperand(2).setReg(BaseIdxReg);
|
||||
|
||||
// If this is a VGPR result only because the index was a VGPR result, the
|
||||
// actual indexing will be done on the SGPR source vector, which will
|
||||
|
@ -2010,13 +2048,14 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
|
|||
buildVCopy(B, DstReg, TmpReg);
|
||||
}
|
||||
|
||||
if (ShouldMoveIndexIntoLoop)
|
||||
ReinsertIndexAdd(MI, 2);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
assert(DstTy.getSizeInBits() == 64);
|
||||
|
||||
LLT SrcTy = MRI.getType(SrcReg);
|
||||
const LLT S32 = LLT::scalar(32);
|
||||
LLT Vec32 = LLT::vector(2 * SrcTy.getNumElements(), 32);
|
||||
|
||||
auto CastSrc = B.buildBitcast(Vec32, SrcReg);
|
||||
|
@ -2029,7 +2068,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
|
|||
MachineInstrSpan Span(MachineBasicBlock::iterator(&MI), &B.getMBB());
|
||||
|
||||
// Compute 32-bit element indices, (2 * OrigIdx, 2 * OrigIdx + 1).
|
||||
auto IdxLo = B.buildShl(S32, IdxReg, One);
|
||||
auto IdxLo = B.buildShl(S32, BaseIdxReg, One);
|
||||
auto IdxHi = B.buildAdd(S32, IdxLo, One);
|
||||
|
||||
auto Extract0 = B.buildExtractVectorElement(DstRegs[0], CastSrc, IdxLo);
|
||||
|
@ -2070,6 +2109,9 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
|
|||
buildVCopy(B, DstRegs[1], TmpReg1);
|
||||
}
|
||||
|
||||
if (ShouldMoveIndexIntoLoop)
|
||||
ReinsertIndexAdd(*IdxLo, 1);
|
||||
|
||||
return;
|
||||
}
|
||||
case AMDGPU::G_INSERT_VECTOR_ELT: {
|
||||
|
|
|
@ -257,15 +257,10 @@ define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) {
|
|||
; GPRIDX-NEXT: s_mov_b64 s[20:21], exec
|
||||
; GPRIDX-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GPRIDX-NEXT: v_readfirstlane_b32 s22, v0
|
||||
; GPRIDX-NEXT: s_lshl_b32 m0, s22, 1
|
||||
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s22, v0
|
||||
; GPRIDX-NEXT: s_lshl_b32 s22, s22, 1
|
||||
; GPRIDX-NEXT: s_add_u32 s23, s22, 1
|
||||
; GPRIDX-NEXT: s_mov_b32 m0, s22
|
||||
; GPRIDX-NEXT: s_nop 0
|
||||
; GPRIDX-NEXT: s_movrels_b32 s22, s4
|
||||
; GPRIDX-NEXT: s_mov_b32 m0, s23
|
||||
; GPRIDX-NEXT: s_nop 0
|
||||
; GPRIDX-NEXT: s_movrels_b32 s23, s4
|
||||
; GPRIDX-NEXT: s_movrels_b32 s23, s5
|
||||
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
||||
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
||||
; GPRIDX-NEXT: s_cbranch_execnz BB6_1
|
||||
|
@ -289,13 +284,10 @@ define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) {
|
|||
; MOVREL-NEXT: s_mov_b64 s[20:21], exec
|
||||
; MOVREL-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1
|
||||
; MOVREL-NEXT: v_readfirstlane_b32 s22, v0
|
||||
; MOVREL-NEXT: s_lshl_b32 m0, s22, 1
|
||||
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s22, v0
|
||||
; MOVREL-NEXT: s_lshl_b32 s22, s22, 1
|
||||
; MOVREL-NEXT: s_add_u32 s23, s22, 1
|
||||
; MOVREL-NEXT: s_mov_b32 m0, s22
|
||||
; MOVREL-NEXT: s_movrels_b32 s22, s4
|
||||
; MOVREL-NEXT: s_mov_b32 m0, s23
|
||||
; MOVREL-NEXT: s_movrels_b32 s23, s4
|
||||
; MOVREL-NEXT: s_movrels_b32 s23, s5
|
||||
; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
|
||||
; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
|
||||
; MOVREL-NEXT: s_cbranch_execnz BB6_1
|
||||
|
@ -371,15 +363,11 @@ define amdgpu_ps void @dyn_extract_v8i64_s_v(<8 x i64> inreg %vec, i32 %sel) {
|
|||
; GPRIDX-NEXT: s_mov_b64 s[16:17], exec
|
||||
; GPRIDX-NEXT: BB8_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GPRIDX-NEXT: v_readfirstlane_b32 s18, v0
|
||||
; GPRIDX-NEXT: s_lshl_b32 m0, s18, 1
|
||||
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s18, v0
|
||||
; GPRIDX-NEXT: s_lshl_b32 s18, s18, 1
|
||||
; GPRIDX-NEXT: s_add_u32 s19, s18, 1
|
||||
; GPRIDX-NEXT: s_mov_b32 m0, s18
|
||||
; GPRIDX-NEXT: s_nop 0
|
||||
; GPRIDX-NEXT: s_movrels_b32 s18, s0
|
||||
; GPRIDX-NEXT: s_mov_b32 m0, s19
|
||||
; GPRIDX-NEXT: s_movrels_b32 s19, s1
|
||||
; GPRIDX-NEXT: v_mov_b32_e32 v1, s18
|
||||
; GPRIDX-NEXT: s_movrels_b32 s19, s0
|
||||
; GPRIDX-NEXT: v_mov_b32_e32 v2, s19
|
||||
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
||||
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
||||
|
@ -410,13 +398,10 @@ define amdgpu_ps void @dyn_extract_v8i64_s_v(<8 x i64> inreg %vec, i32 %sel) {
|
|||
; MOVREL-NEXT: s_mov_b64 s[16:17], exec
|
||||
; MOVREL-NEXT: BB8_1: ; =>This Inner Loop Header: Depth=1
|
||||
; MOVREL-NEXT: v_readfirstlane_b32 s18, v0
|
||||
; MOVREL-NEXT: s_lshl_b32 m0, s18, 1
|
||||
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s18, v0
|
||||
; MOVREL-NEXT: s_lshl_b32 s18, s18, 1
|
||||
; MOVREL-NEXT: s_add_u32 s19, s18, 1
|
||||
; MOVREL-NEXT: s_mov_b32 m0, s18
|
||||
; MOVREL-NEXT: s_movrels_b32 s18, s0
|
||||
; MOVREL-NEXT: s_mov_b32 m0, s19
|
||||
; MOVREL-NEXT: s_movrels_b32 s19, s0
|
||||
; MOVREL-NEXT: s_movrels_b32 s19, s1
|
||||
; MOVREL-NEXT: v_mov_b32_e32 v1, s18
|
||||
; MOVREL-NEXT: v_mov_b32_e32 v2, s19
|
||||
; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
|
||||
|
@ -439,14 +424,13 @@ define i64 @dyn_extract_v8i64_v_v(<8 x i64> %vec, i32 %sel) {
|
|||
; GPRIDX-NEXT: s_mov_b64 s[4:5], exec
|
||||
; GPRIDX-NEXT: BB9_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GPRIDX-NEXT: v_readfirstlane_b32 s6, v16
|
||||
; GPRIDX-NEXT: s_lshl_b32 s7, s6, 1
|
||||
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16
|
||||
; GPRIDX-NEXT: s_lshl_b32 s6, s6, 1
|
||||
; GPRIDX-NEXT: s_add_u32 s7, s6, 1
|
||||
; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0)
|
||||
; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0)
|
||||
; GPRIDX-NEXT: v_mov_b32_e32 v17, v0
|
||||
; GPRIDX-NEXT: s_set_gpr_idx_off
|
||||
; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0)
|
||||
; GPRIDX-NEXT: v_mov_b32_e32 v18, v0
|
||||
; GPRIDX-NEXT: v_mov_b32_e32 v18, v1
|
||||
; GPRIDX-NEXT: s_set_gpr_idx_off
|
||||
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
||||
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
||||
|
@ -463,13 +447,10 @@ define i64 @dyn_extract_v8i64_v_v(<8 x i64> %vec, i32 %sel) {
|
|||
; MOVREL-NEXT: s_mov_b64 s[4:5], exec
|
||||
; MOVREL-NEXT: BB9_1: ; =>This Inner Loop Header: Depth=1
|
||||
; MOVREL-NEXT: v_readfirstlane_b32 s6, v16
|
||||
; MOVREL-NEXT: s_lshl_b32 m0, s6, 1
|
||||
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16
|
||||
; MOVREL-NEXT: s_lshl_b32 s6, s6, 1
|
||||
; MOVREL-NEXT: s_mov_b32 m0, s6
|
||||
; MOVREL-NEXT: s_add_u32 s7, s6, 1
|
||||
; MOVREL-NEXT: v_movrels_b32_e32 v17, v0
|
||||
; MOVREL-NEXT: s_mov_b32 m0, s7
|
||||
; MOVREL-NEXT: v_movrels_b32_e32 v18, v0
|
||||
; MOVREL-NEXT: v_movrels_b32_e32 v18, v1
|
||||
; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
|
||||
; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
|
||||
; MOVREL-NEXT: s_cbranch_execnz BB9_1
|
||||
|
@ -487,24 +468,20 @@ define amdgpu_ps void @dyn_extract_v8i64_v_s(<8 x i64> %vec, i32 inreg %sel) {
|
|||
; GPRIDX-LABEL: dyn_extract_v8i64_v_s:
|
||||
; GPRIDX: ; %bb.0: ; %entry
|
||||
; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1
|
||||
; GPRIDX-NEXT: s_add_u32 s1, s0, 1
|
||||
; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0)
|
||||
; GPRIDX-NEXT: v_mov_b32_e32 v16, v0
|
||||
; GPRIDX-NEXT: s_set_gpr_idx_off
|
||||
; GPRIDX-NEXT: s_set_gpr_idx_on s1, gpr_idx(SRC0)
|
||||
; GPRIDX-NEXT: v_mov_b32_e32 v17, v0
|
||||
; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0)
|
||||
; GPRIDX-NEXT: v_mov_b32_e32 v17, v1
|
||||
; GPRIDX-NEXT: s_set_gpr_idx_off
|
||||
; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[16:17], off
|
||||
; GPRIDX-NEXT: s_endpgm
|
||||
;
|
||||
; MOVREL-LABEL: dyn_extract_v8i64_v_s:
|
||||
; MOVREL: ; %bb.0: ; %entry
|
||||
; MOVREL-NEXT: s_lshl_b32 s0, s2, 1
|
||||
; MOVREL-NEXT: s_mov_b32 m0, s0
|
||||
; MOVREL-NEXT: s_add_u32 s0, s0, 1
|
||||
; MOVREL-NEXT: s_lshl_b32 m0, s2, 1
|
||||
; MOVREL-NEXT: v_movrels_b32_e32 v16, v0
|
||||
; MOVREL-NEXT: s_mov_b32 m0, s0
|
||||
; MOVREL-NEXT: v_movrels_b32_e32 v17, v0
|
||||
; MOVREL-NEXT: v_movrels_b32_e32 v17, v1
|
||||
; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[16:17]
|
||||
; MOVREL-NEXT: s_endpgm
|
||||
entry:
|
||||
|
@ -573,30 +550,30 @@ define amdgpu_ps float @dyn_extract_v8f32_s_s_offset3(<8 x float> inreg %vec, i3
|
|||
; GPRIDX-LABEL: dyn_extract_v8f32_s_s_offset3:
|
||||
; GPRIDX: ; %bb.0: ; %entry
|
||||
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
||||
; GPRIDX-NEXT: s_add_u32 m0, s10, 3
|
||||
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
||||
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
||||
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
||||
; GPRIDX-NEXT: s_mov_b32 m0, s10
|
||||
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
||||
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
||||
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
||||
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
||||
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
||||
; GPRIDX-NEXT: s_movrels_b32 s0, s0
|
||||
; GPRIDX-NEXT: s_movrels_b32 s0, s3
|
||||
; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GPRIDX-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; MOVREL-LABEL: dyn_extract_v8f32_s_s_offset3:
|
||||
; MOVREL: ; %bb.0: ; %entry
|
||||
; MOVREL-NEXT: s_mov_b32 s0, s2
|
||||
; MOVREL-NEXT: s_add_u32 m0, s10, 3
|
||||
; MOVREL-NEXT: s_mov_b32 s1, s3
|
||||
; MOVREL-NEXT: s_mov_b32 s2, s4
|
||||
; MOVREL-NEXT: s_mov_b32 s3, s5
|
||||
; MOVREL-NEXT: s_mov_b32 m0, s10
|
||||
; MOVREL-NEXT: s_mov_b32 s2, s4
|
||||
; MOVREL-NEXT: s_mov_b32 s4, s6
|
||||
; MOVREL-NEXT: s_mov_b32 s5, s7
|
||||
; MOVREL-NEXT: s_mov_b32 s6, s8
|
||||
; MOVREL-NEXT: s_mov_b32 s7, s9
|
||||
; MOVREL-NEXT: s_movrels_b32 s0, s0
|
||||
; MOVREL-NEXT: s_movrels_b32 s0, s3
|
||||
; MOVREL-NEXT: v_mov_b32_e32 v0, s0
|
||||
; MOVREL-NEXT: ; return to shader part epilog
|
||||
entry:
|
||||
|
@ -609,38 +586,36 @@ define float @dyn_extract_v8f32_v_v_offset3(<8 x float> %vec, i32 %sel) {
|
|||
; GPRIDX-LABEL: dyn_extract_v8f32_v_v_offset3:
|
||||
; GPRIDX: ; %bb.0: ; %entry
|
||||
; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GPRIDX-NEXT: v_add_u32_e32 v9, 3, v8
|
||||
; GPRIDX-NEXT: s_mov_b64 s[4:5], exec
|
||||
; GPRIDX-NEXT: BB13_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GPRIDX-NEXT: v_readfirstlane_b32 s6, v9
|
||||
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v9
|
||||
; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8
|
||||
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8
|
||||
; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0)
|
||||
; GPRIDX-NEXT: v_mov_b32_e32 v8, v0
|
||||
; GPRIDX-NEXT: v_mov_b32_e32 v9, v3
|
||||
; GPRIDX-NEXT: s_set_gpr_idx_off
|
||||
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
||||
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
||||
; GPRIDX-NEXT: s_cbranch_execnz BB13_1
|
||||
; GPRIDX-NEXT: ; %bb.2:
|
||||
; GPRIDX-NEXT: s_mov_b64 exec, s[4:5]
|
||||
; GPRIDX-NEXT: v_mov_b32_e32 v0, v8
|
||||
; GPRIDX-NEXT: v_mov_b32_e32 v0, v9
|
||||
; GPRIDX-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; MOVREL-LABEL: dyn_extract_v8f32_v_v_offset3:
|
||||
; MOVREL: ; %bb.0: ; %entry
|
||||
; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; MOVREL-NEXT: v_add_u32_e32 v9, vcc, 3, v8
|
||||
; MOVREL-NEXT: s_mov_b64 s[4:5], exec
|
||||
; MOVREL-NEXT: BB13_1: ; =>This Inner Loop Header: Depth=1
|
||||
; MOVREL-NEXT: v_readfirstlane_b32 s6, v9
|
||||
; MOVREL-NEXT: v_readfirstlane_b32 s6, v8
|
||||
; MOVREL-NEXT: s_mov_b32 m0, s6
|
||||
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v9
|
||||
; MOVREL-NEXT: v_movrels_b32_e32 v8, v0
|
||||
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8
|
||||
; MOVREL-NEXT: v_movrels_b32_e32 v9, v3
|
||||
; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
|
||||
; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
|
||||
; MOVREL-NEXT: s_cbranch_execnz BB13_1
|
||||
; MOVREL-NEXT: ; %bb.2:
|
||||
; MOVREL-NEXT: s_mov_b64 exec, s[4:5]
|
||||
; MOVREL-NEXT: v_mov_b32_e32 v0, v8
|
||||
; MOVREL-NEXT: v_mov_b32_e32 v0, v9
|
||||
; MOVREL-NEXT: s_setpc_b64 s[30:31]
|
||||
entry:
|
||||
%add = add i32 %sel, 3
|
||||
|
@ -653,9 +628,9 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset1(<8 x double> inreg %vec,
|
|||
; GPRIDX: ; %bb.0: ; %entry
|
||||
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
||||
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
||||
; GPRIDX-NEXT: s_add_u32 m0, s18, 1
|
||||
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
||||
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
||||
; GPRIDX-NEXT: s_mov_b32 m0, s18
|
||||
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
||||
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
||||
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
||||
|
@ -668,16 +643,16 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset1(<8 x double> inreg %vec,
|
|||
; GPRIDX-NEXT: s_mov_b32 s13, s15
|
||||
; GPRIDX-NEXT: s_mov_b32 s14, s16
|
||||
; GPRIDX-NEXT: s_mov_b32 s15, s17
|
||||
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
|
||||
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[2:3]
|
||||
; GPRIDX-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset1:
|
||||
; MOVREL: ; %bb.0: ; %entry
|
||||
; MOVREL-NEXT: s_mov_b32 s0, s2
|
||||
; MOVREL-NEXT: s_mov_b32 s1, s3
|
||||
; MOVREL-NEXT: s_add_u32 m0, s18, 1
|
||||
; MOVREL-NEXT: s_mov_b32 s2, s4
|
||||
; MOVREL-NEXT: s_mov_b32 s3, s5
|
||||
; MOVREL-NEXT: s_mov_b32 m0, s18
|
||||
; MOVREL-NEXT: s_mov_b32 s4, s6
|
||||
; MOVREL-NEXT: s_mov_b32 s5, s7
|
||||
; MOVREL-NEXT: s_mov_b32 s6, s8
|
||||
|
@ -690,7 +665,7 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset1(<8 x double> inreg %vec,
|
|||
; MOVREL-NEXT: s_mov_b32 s13, s15
|
||||
; MOVREL-NEXT: s_mov_b32 s14, s16
|
||||
; MOVREL-NEXT: s_mov_b32 s15, s17
|
||||
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
|
||||
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[2:3]
|
||||
; MOVREL-NEXT: ; return to shader part epilog
|
||||
entry:
|
||||
%add = add i32 %sel, 1
|
||||
|
@ -703,11 +678,11 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset2(<8 x double> inreg %vec,
|
|||
; GPRIDX: ; %bb.0: ; %entry
|
||||
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
||||
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
||||
; GPRIDX-NEXT: s_add_u32 m0, s18, 2
|
||||
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
||||
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
||||
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
||||
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
||||
; GPRIDX-NEXT: s_mov_b32 m0, s18
|
||||
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
||||
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
||||
; GPRIDX-NEXT: s_mov_b32 s8, s10
|
||||
|
@ -718,18 +693,18 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset2(<8 x double> inreg %vec,
|
|||
; GPRIDX-NEXT: s_mov_b32 s13, s15
|
||||
; GPRIDX-NEXT: s_mov_b32 s14, s16
|
||||
; GPRIDX-NEXT: s_mov_b32 s15, s17
|
||||
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
|
||||
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[4:5]
|
||||
; GPRIDX-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset2:
|
||||
; MOVREL: ; %bb.0: ; %entry
|
||||
; MOVREL-NEXT: s_mov_b32 s0, s2
|
||||
; MOVREL-NEXT: s_mov_b32 s1, s3
|
||||
; MOVREL-NEXT: s_add_u32 m0, s18, 2
|
||||
; MOVREL-NEXT: s_mov_b32 s2, s4
|
||||
; MOVREL-NEXT: s_mov_b32 s3, s5
|
||||
; MOVREL-NEXT: s_mov_b32 s4, s6
|
||||
; MOVREL-NEXT: s_mov_b32 s5, s7
|
||||
; MOVREL-NEXT: s_mov_b32 m0, s18
|
||||
; MOVREL-NEXT: s_mov_b32 s6, s8
|
||||
; MOVREL-NEXT: s_mov_b32 s7, s9
|
||||
; MOVREL-NEXT: s_mov_b32 s8, s10
|
||||
|
@ -740,7 +715,7 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset2(<8 x double> inreg %vec,
|
|||
; MOVREL-NEXT: s_mov_b32 s13, s15
|
||||
; MOVREL-NEXT: s_mov_b32 s14, s16
|
||||
; MOVREL-NEXT: s_mov_b32 s15, s17
|
||||
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
|
||||
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[4:5]
|
||||
; MOVREL-NEXT: ; return to shader part epilog
|
||||
entry:
|
||||
%add = add i32 %sel, 2
|
||||
|
@ -753,13 +728,13 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset3(<8 x double> inreg %vec,
|
|||
; GPRIDX: ; %bb.0: ; %entry
|
||||
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
||||
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
||||
; GPRIDX-NEXT: s_add_u32 m0, s18, 3
|
||||
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
||||
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
||||
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
||||
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
||||
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
||||
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
||||
; GPRIDX-NEXT: s_mov_b32 m0, s18
|
||||
; GPRIDX-NEXT: s_mov_b32 s8, s10
|
||||
; GPRIDX-NEXT: s_mov_b32 s9, s11
|
||||
; GPRIDX-NEXT: s_mov_b32 s10, s12
|
||||
|
@ -768,20 +743,20 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset3(<8 x double> inreg %vec,
|
|||
; GPRIDX-NEXT: s_mov_b32 s13, s15
|
||||
; GPRIDX-NEXT: s_mov_b32 s14, s16
|
||||
; GPRIDX-NEXT: s_mov_b32 s15, s17
|
||||
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
|
||||
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[6:7]
|
||||
; GPRIDX-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset3:
|
||||
; MOVREL: ; %bb.0: ; %entry
|
||||
; MOVREL-NEXT: s_mov_b32 s0, s2
|
||||
; MOVREL-NEXT: s_mov_b32 s1, s3
|
||||
; MOVREL-NEXT: s_add_u32 m0, s18, 3
|
||||
; MOVREL-NEXT: s_mov_b32 s2, s4
|
||||
; MOVREL-NEXT: s_mov_b32 s3, s5
|
||||
; MOVREL-NEXT: s_mov_b32 s4, s6
|
||||
; MOVREL-NEXT: s_mov_b32 s5, s7
|
||||
; MOVREL-NEXT: s_mov_b32 s6, s8
|
||||
; MOVREL-NEXT: s_mov_b32 s7, s9
|
||||
; MOVREL-NEXT: s_mov_b32 m0, s18
|
||||
; MOVREL-NEXT: s_mov_b32 s8, s10
|
||||
; MOVREL-NEXT: s_mov_b32 s9, s11
|
||||
; MOVREL-NEXT: s_mov_b32 s10, s12
|
||||
|
@ -790,7 +765,7 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset3(<8 x double> inreg %vec,
|
|||
; MOVREL-NEXT: s_mov_b32 s13, s15
|
||||
; MOVREL-NEXT: s_mov_b32 s14, s16
|
||||
; MOVREL-NEXT: s_mov_b32 s15, s17
|
||||
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
|
||||
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[6:7]
|
||||
; MOVREL-NEXT: ; return to shader part epilog
|
||||
entry:
|
||||
%add = add i32 %sel, 3
|
||||
|
@ -803,7 +778,6 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset4(<8 x double> inreg %vec,
|
|||
; GPRIDX: ; %bb.0: ; %entry
|
||||
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
||||
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
||||
; GPRIDX-NEXT: s_add_u32 m0, s18, 4
|
||||
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
||||
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
||||
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
||||
|
@ -812,20 +786,20 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset4(<8 x double> inreg %vec,
|
|||
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
||||
; GPRIDX-NEXT: s_mov_b32 s8, s10
|
||||
; GPRIDX-NEXT: s_mov_b32 s9, s11
|
||||
; GPRIDX-NEXT: s_mov_b32 m0, s18
|
||||
; GPRIDX-NEXT: s_mov_b32 s10, s12
|
||||
; GPRIDX-NEXT: s_mov_b32 s11, s13
|
||||
; GPRIDX-NEXT: s_mov_b32 s12, s14
|
||||
; GPRIDX-NEXT: s_mov_b32 s13, s15
|
||||
; GPRIDX-NEXT: s_mov_b32 s14, s16
|
||||
; GPRIDX-NEXT: s_mov_b32 s15, s17
|
||||
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
|
||||
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[8:9]
|
||||
; GPRIDX-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset4:
|
||||
; MOVREL: ; %bb.0: ; %entry
|
||||
; MOVREL-NEXT: s_mov_b32 s0, s2
|
||||
; MOVREL-NEXT: s_mov_b32 s1, s3
|
||||
; MOVREL-NEXT: s_add_u32 m0, s18, 4
|
||||
; MOVREL-NEXT: s_mov_b32 s2, s4
|
||||
; MOVREL-NEXT: s_mov_b32 s3, s5
|
||||
; MOVREL-NEXT: s_mov_b32 s4, s6
|
||||
|
@ -834,13 +808,14 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset4(<8 x double> inreg %vec,
|
|||
; MOVREL-NEXT: s_mov_b32 s7, s9
|
||||
; MOVREL-NEXT: s_mov_b32 s8, s10
|
||||
; MOVREL-NEXT: s_mov_b32 s9, s11
|
||||
; MOVREL-NEXT: s_mov_b32 m0, s18
|
||||
; MOVREL-NEXT: s_mov_b32 s10, s12
|
||||
; MOVREL-NEXT: s_mov_b32 s11, s13
|
||||
; MOVREL-NEXT: s_mov_b32 s12, s14
|
||||
; MOVREL-NEXT: s_mov_b32 s13, s15
|
||||
; MOVREL-NEXT: s_mov_b32 s14, s16
|
||||
; MOVREL-NEXT: s_mov_b32 s15, s17
|
||||
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
|
||||
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[8:9]
|
||||
; MOVREL-NEXT: ; return to shader part epilog
|
||||
entry:
|
||||
%add = add i32 %sel, 4
|
||||
|
@ -853,7 +828,6 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset5(<8 x double> inreg %vec,
|
|||
; GPRIDX: ; %bb.0: ; %entry
|
||||
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
||||
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
||||
; GPRIDX-NEXT: s_add_u32 m0, s18, 5
|
||||
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
||||
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
||||
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
||||
|
@ -864,18 +838,18 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset5(<8 x double> inreg %vec,
|
|||
; GPRIDX-NEXT: s_mov_b32 s9, s11
|
||||
; GPRIDX-NEXT: s_mov_b32 s10, s12
|
||||
; GPRIDX-NEXT: s_mov_b32 s11, s13
|
||||
; GPRIDX-NEXT: s_mov_b32 m0, s18
|
||||
; GPRIDX-NEXT: s_mov_b32 s12, s14
|
||||
; GPRIDX-NEXT: s_mov_b32 s13, s15
|
||||
; GPRIDX-NEXT: s_mov_b32 s14, s16
|
||||
; GPRIDX-NEXT: s_mov_b32 s15, s17
|
||||
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
|
||||
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[10:11]
|
||||
; GPRIDX-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset5:
|
||||
; MOVREL: ; %bb.0: ; %entry
|
||||
; MOVREL-NEXT: s_mov_b32 s0, s2
|
||||
; MOVREL-NEXT: s_mov_b32 s1, s3
|
||||
; MOVREL-NEXT: s_add_u32 m0, s18, 5
|
||||
; MOVREL-NEXT: s_mov_b32 s2, s4
|
||||
; MOVREL-NEXT: s_mov_b32 s3, s5
|
||||
; MOVREL-NEXT: s_mov_b32 s4, s6
|
||||
|
@ -886,11 +860,12 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset5(<8 x double> inreg %vec,
|
|||
; MOVREL-NEXT: s_mov_b32 s9, s11
|
||||
; MOVREL-NEXT: s_mov_b32 s10, s12
|
||||
; MOVREL-NEXT: s_mov_b32 s11, s13
|
||||
; MOVREL-NEXT: s_mov_b32 m0, s18
|
||||
; MOVREL-NEXT: s_mov_b32 s12, s14
|
||||
; MOVREL-NEXT: s_mov_b32 s13, s15
|
||||
; MOVREL-NEXT: s_mov_b32 s14, s16
|
||||
; MOVREL-NEXT: s_mov_b32 s15, s17
|
||||
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
|
||||
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[10:11]
|
||||
; MOVREL-NEXT: ; return to shader part epilog
|
||||
entry:
|
||||
%add = add i32 %sel, 5
|
||||
|
@ -903,7 +878,6 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset6(<8 x double> inreg %vec,
|
|||
; GPRIDX: ; %bb.0: ; %entry
|
||||
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
||||
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
||||
; GPRIDX-NEXT: s_add_u32 m0, s18, 6
|
||||
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
||||
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
||||
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
||||
|
@ -916,16 +890,16 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset6(<8 x double> inreg %vec,
|
|||
; GPRIDX-NEXT: s_mov_b32 s11, s13
|
||||
; GPRIDX-NEXT: s_mov_b32 s12, s14
|
||||
; GPRIDX-NEXT: s_mov_b32 s13, s15
|
||||
; GPRIDX-NEXT: s_mov_b32 m0, s18
|
||||
; GPRIDX-NEXT: s_mov_b32 s14, s16
|
||||
; GPRIDX-NEXT: s_mov_b32 s15, s17
|
||||
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
|
||||
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[12:13]
|
||||
; GPRIDX-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset6:
|
||||
; MOVREL: ; %bb.0: ; %entry
|
||||
; MOVREL-NEXT: s_mov_b32 s0, s2
|
||||
; MOVREL-NEXT: s_mov_b32 s1, s3
|
||||
; MOVREL-NEXT: s_add_u32 m0, s18, 6
|
||||
; MOVREL-NEXT: s_mov_b32 s2, s4
|
||||
; MOVREL-NEXT: s_mov_b32 s3, s5
|
||||
; MOVREL-NEXT: s_mov_b32 s4, s6
|
||||
|
@ -938,9 +912,10 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset6(<8 x double> inreg %vec,
|
|||
; MOVREL-NEXT: s_mov_b32 s11, s13
|
||||
; MOVREL-NEXT: s_mov_b32 s12, s14
|
||||
; MOVREL-NEXT: s_mov_b32 s13, s15
|
||||
; MOVREL-NEXT: s_mov_b32 m0, s18
|
||||
; MOVREL-NEXT: s_mov_b32 s14, s16
|
||||
; MOVREL-NEXT: s_mov_b32 s15, s17
|
||||
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
|
||||
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[12:13]
|
||||
; MOVREL-NEXT: ; return to shader part epilog
|
||||
entry:
|
||||
%add = add i32 %sel, 6
|
||||
|
@ -953,7 +928,6 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset7(<8 x double> inreg %vec,
|
|||
; GPRIDX: ; %bb.0: ; %entry
|
||||
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
||||
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
||||
; GPRIDX-NEXT: s_add_u32 m0, s18, 7
|
||||
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
||||
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
||||
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
||||
|
@ -968,14 +942,15 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset7(<8 x double> inreg %vec,
|
|||
; GPRIDX-NEXT: s_mov_b32 s13, s15
|
||||
; GPRIDX-NEXT: s_mov_b32 s14, s16
|
||||
; GPRIDX-NEXT: s_mov_b32 s15, s17
|
||||
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
|
||||
; GPRIDX-NEXT: s_mov_b32 m0, s18
|
||||
; GPRIDX-NEXT: s_nop 0
|
||||
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[14:15]
|
||||
; GPRIDX-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset7:
|
||||
; MOVREL: ; %bb.0: ; %entry
|
||||
; MOVREL-NEXT: s_mov_b32 s0, s2
|
||||
; MOVREL-NEXT: s_mov_b32 s1, s3
|
||||
; MOVREL-NEXT: s_add_u32 m0, s18, 7
|
||||
; MOVREL-NEXT: s_mov_b32 s2, s4
|
||||
; MOVREL-NEXT: s_mov_b32 s3, s5
|
||||
; MOVREL-NEXT: s_mov_b32 s4, s6
|
||||
|
@ -990,7 +965,8 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset7(<8 x double> inreg %vec,
|
|||
; MOVREL-NEXT: s_mov_b32 s13, s15
|
||||
; MOVREL-NEXT: s_mov_b32 s14, s16
|
||||
; MOVREL-NEXT: s_mov_b32 s15, s17
|
||||
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
|
||||
; MOVREL-NEXT: s_mov_b32 m0, s18
|
||||
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[14:15]
|
||||
; MOVREL-NEXT: ; return to shader part epilog
|
||||
entry:
|
||||
%add = add i32 %sel, 7
|
||||
|
@ -1052,49 +1028,45 @@ define double @dyn_extract_v8f64_v_v_offset3(<8 x double> %vec, i32 %sel) {
|
|||
; GPRIDX-LABEL: dyn_extract_v8f64_v_v_offset3:
|
||||
; GPRIDX: ; %bb.0: ; %entry
|
||||
; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GPRIDX-NEXT: v_add_u32_e32 v18, 3, v16
|
||||
; GPRIDX-NEXT: s_mov_b64 s[4:5], exec
|
||||
; GPRIDX-NEXT: BB22_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GPRIDX-NEXT: v_readfirstlane_b32 s6, v18
|
||||
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v18
|
||||
; GPRIDX-NEXT: s_lshl_b32 s6, s6, 1
|
||||
; GPRIDX-NEXT: s_add_u32 s7, s6, 1
|
||||
; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0)
|
||||
; GPRIDX-NEXT: v_mov_b32_e32 v16, v0
|
||||
; GPRIDX-NEXT: s_set_gpr_idx_off
|
||||
; GPRIDX-NEXT: v_readfirstlane_b32 s6, v16
|
||||
; GPRIDX-NEXT: s_add_u32 s7, s6, 3
|
||||
; GPRIDX-NEXT: s_lshl_b32 s7, s7, 1
|
||||
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16
|
||||
; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0)
|
||||
; GPRIDX-NEXT: v_mov_b32_e32 v17, v0
|
||||
; GPRIDX-NEXT: s_set_gpr_idx_off
|
||||
; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0)
|
||||
; GPRIDX-NEXT: v_mov_b32_e32 v18, v1
|
||||
; GPRIDX-NEXT: s_set_gpr_idx_off
|
||||
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
||||
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
||||
; GPRIDX-NEXT: s_cbranch_execnz BB22_1
|
||||
; GPRIDX-NEXT: ; %bb.2:
|
||||
; GPRIDX-NEXT: s_mov_b64 exec, s[4:5]
|
||||
; GPRIDX-NEXT: v_mov_b32_e32 v0, v16
|
||||
; GPRIDX-NEXT: v_mov_b32_e32 v1, v17
|
||||
; GPRIDX-NEXT: v_mov_b32_e32 v0, v17
|
||||
; GPRIDX-NEXT: v_mov_b32_e32 v1, v18
|
||||
; GPRIDX-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; MOVREL-LABEL: dyn_extract_v8f64_v_v_offset3:
|
||||
; MOVREL: ; %bb.0: ; %entry
|
||||
; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; MOVREL-NEXT: v_add_u32_e32 v18, vcc, 3, v16
|
||||
; MOVREL-NEXT: s_mov_b64 s[4:5], exec
|
||||
; MOVREL-NEXT: BB22_1: ; =>This Inner Loop Header: Depth=1
|
||||
; MOVREL-NEXT: v_readfirstlane_b32 s6, v18
|
||||
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v18
|
||||
; MOVREL-NEXT: s_lshl_b32 s6, s6, 1
|
||||
; MOVREL-NEXT: s_mov_b32 m0, s6
|
||||
; MOVREL-NEXT: s_add_u32 s7, s6, 1
|
||||
; MOVREL-NEXT: v_movrels_b32_e32 v16, v0
|
||||
; MOVREL-NEXT: s_mov_b32 m0, s7
|
||||
; MOVREL-NEXT: v_readfirstlane_b32 s6, v16
|
||||
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16
|
||||
; MOVREL-NEXT: s_add_u32 s6, s6, 3
|
||||
; MOVREL-NEXT: s_lshl_b32 m0, s6, 1
|
||||
; MOVREL-NEXT: v_movrels_b32_e32 v17, v0
|
||||
; MOVREL-NEXT: v_movrels_b32_e32 v18, v1
|
||||
; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
|
||||
; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
|
||||
; MOVREL-NEXT: s_cbranch_execnz BB22_1
|
||||
; MOVREL-NEXT: ; %bb.2:
|
||||
; MOVREL-NEXT: s_mov_b64 exec, s[4:5]
|
||||
; MOVREL-NEXT: v_mov_b32_e32 v0, v16
|
||||
; MOVREL-NEXT: v_mov_b32_e32 v1, v17
|
||||
; MOVREL-NEXT: v_mov_b32_e32 v0, v17
|
||||
; MOVREL-NEXT: v_mov_b32_e32 v1, v18
|
||||
; MOVREL-NEXT: s_setpc_b64 s[30:31]
|
||||
entry:
|
||||
%add = add i32 %sel, 3
|
||||
|
@ -1188,14 +1160,13 @@ define i8 addrspace(1)* @dyn_extract_v8p1_v_v(<8 x i8 addrspace(1)*> %vec, i32 %
|
|||
; GPRIDX-NEXT: s_mov_b64 s[4:5], exec
|
||||
; GPRIDX-NEXT: BB25_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GPRIDX-NEXT: v_readfirstlane_b32 s6, v16
|
||||
; GPRIDX-NEXT: s_lshl_b32 s7, s6, 1
|
||||
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16
|
||||
; GPRIDX-NEXT: s_lshl_b32 s6, s6, 1
|
||||
; GPRIDX-NEXT: s_add_u32 s7, s6, 1
|
||||
; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0)
|
||||
; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0)
|
||||
; GPRIDX-NEXT: v_mov_b32_e32 v17, v0
|
||||
; GPRIDX-NEXT: s_set_gpr_idx_off
|
||||
; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0)
|
||||
; GPRIDX-NEXT: v_mov_b32_e32 v18, v0
|
||||
; GPRIDX-NEXT: v_mov_b32_e32 v18, v1
|
||||
; GPRIDX-NEXT: s_set_gpr_idx_off
|
||||
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
||||
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
||||
|
@ -1212,13 +1183,10 @@ define i8 addrspace(1)* @dyn_extract_v8p1_v_v(<8 x i8 addrspace(1)*> %vec, i32 %
|
|||
; MOVREL-NEXT: s_mov_b64 s[4:5], exec
|
||||
; MOVREL-NEXT: BB25_1: ; =>This Inner Loop Header: Depth=1
|
||||
; MOVREL-NEXT: v_readfirstlane_b32 s6, v16
|
||||
; MOVREL-NEXT: s_lshl_b32 m0, s6, 1
|
||||
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16
|
||||
; MOVREL-NEXT: s_lshl_b32 s6, s6, 1
|
||||
; MOVREL-NEXT: s_mov_b32 m0, s6
|
||||
; MOVREL-NEXT: s_add_u32 s7, s6, 1
|
||||
; MOVREL-NEXT: v_movrels_b32_e32 v17, v0
|
||||
; MOVREL-NEXT: s_mov_b32 m0, s7
|
||||
; MOVREL-NEXT: v_movrels_b32_e32 v18, v0
|
||||
; MOVREL-NEXT: v_movrels_b32_e32 v18, v1
|
||||
; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
|
||||
; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
|
||||
; MOVREL-NEXT: s_cbranch_execnz BB25_1
|
||||
|
|
|
@ -286,18 +286,14 @@ body: |
|
|||
; MOVREL-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_1
|
||||
; MOVREL: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
||||
; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||
; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||||
; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
||||
; MOVREL: $m0 = COPY [[S_ADD_U32_]]
|
||||
; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
|
||||
; MOVREL: $m0 = COPY [[COPY1]]
|
||||
; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub1, implicit $m0, implicit [[COPY]]
|
||||
; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
|
||||
; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_1
|
||||
; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
||||
; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||
; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||||
; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
||||
; GPRIDX: $m0 = COPY [[S_ADD_U32_]]
|
||||
; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
|
||||
; GPRIDX: $m0 = COPY [[COPY1]]
|
||||
; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub1, implicit $m0, implicit [[COPY]]
|
||||
; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
|
||||
%0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
||||
%1:sgpr(s32) = COPY $sgpr8
|
||||
|
@ -352,18 +348,14 @@ body: |
|
|||
; MOVREL-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_7
|
||||
; MOVREL: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
||||
; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||
; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 7
|
||||
; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
||||
; MOVREL: $m0 = COPY [[S_ADD_U32_]]
|
||||
; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
|
||||
; MOVREL: $m0 = COPY [[COPY1]]
|
||||
; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub7, implicit $m0, implicit [[COPY]]
|
||||
; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
|
||||
; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_7
|
||||
; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
||||
; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||
; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 7
|
||||
; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
||||
; GPRIDX: $m0 = COPY [[S_ADD_U32_]]
|
||||
; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
|
||||
; GPRIDX: $m0 = COPY [[COPY1]]
|
||||
; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub7, implicit $m0, implicit [[COPY]]
|
||||
; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
|
||||
%0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
||||
%1:sgpr(s32) = COPY $sgpr8
|
||||
|
@ -418,18 +410,14 @@ body: |
|
|||
; MOVREL-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_1
|
||||
; MOVREL: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||
; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||||
; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
||||
; MOVREL: $m0 = COPY [[S_ADD_U32_]]
|
||||
; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]]
|
||||
; MOVREL: $m0 = COPY [[COPY1]]
|
||||
; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub2_sub3, implicit $m0, implicit [[COPY]]
|
||||
; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]]
|
||||
; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_1
|
||||
; GPRIDX: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||
; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||||
; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
||||
; GPRIDX: $m0 = COPY [[S_ADD_U32_]]
|
||||
; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]]
|
||||
; GPRIDX: $m0 = COPY [[COPY1]]
|
||||
; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub2_sub3, implicit $m0, implicit [[COPY]]
|
||||
; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]]
|
||||
%0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
%1:sgpr(s32) = COPY $sgpr8
|
||||
|
@ -451,18 +439,14 @@ body: |
|
|||
; MOVREL-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_2
|
||||
; MOVREL: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||
; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2
|
||||
; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
||||
; MOVREL: $m0 = COPY [[S_ADD_U32_]]
|
||||
; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]]
|
||||
; MOVREL: $m0 = COPY [[COPY1]]
|
||||
; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub4_sub5, implicit $m0, implicit [[COPY]]
|
||||
; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]]
|
||||
; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_2
|
||||
; GPRIDX: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||
; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2
|
||||
; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
||||
; GPRIDX: $m0 = COPY [[S_ADD_U32_]]
|
||||
; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]]
|
||||
; GPRIDX: $m0 = COPY [[COPY1]]
|
||||
; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub4_sub5, implicit $m0, implicit [[COPY]]
|
||||
; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]]
|
||||
%0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
%1:sgpr(s32) = COPY $sgpr8
|
||||
|
@ -685,18 +669,14 @@ body: |
|
|||
; MOVREL-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_1
|
||||
; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||
; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||||
; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
||||
; MOVREL: $m0 = COPY [[S_ADD_U32_]]
|
||||
; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
|
||||
; MOVREL: $m0 = COPY [[COPY1]]
|
||||
; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub1, implicit $m0, implicit $exec, implicit [[COPY]]
|
||||
; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]]
|
||||
; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_1
|
||||
; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||
; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||||
; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
||||
; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_U32_]], 1, implicit-def $m0, implicit $m0
|
||||
; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
|
||||
; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0
|
||||
; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub1, implicit $exec, implicit [[COPY]], implicit $m0
|
||||
; GPRIDX: S_SET_GPR_IDX_OFF
|
||||
; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
|
||||
%0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
|
@ -753,18 +733,14 @@ body: |
|
|||
; MOVREL-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_7
|
||||
; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||
; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 7
|
||||
; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
||||
; MOVREL: $m0 = COPY [[S_ADD_U32_]]
|
||||
; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
|
||||
; MOVREL: $m0 = COPY [[COPY1]]
|
||||
; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub7, implicit $m0, implicit $exec, implicit [[COPY]]
|
||||
; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]]
|
||||
; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_7
|
||||
; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||
; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 7
|
||||
; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
||||
; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_U32_]], 1, implicit-def $m0, implicit $m0
|
||||
; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
|
||||
; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0
|
||||
; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub7, implicit $exec, implicit [[COPY]], implicit $m0
|
||||
; GPRIDX: S_SET_GPR_IDX_OFF
|
||||
; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
|
||||
%0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
|
|
|
@ -436,10 +436,10 @@ body: |
|
|||
; WAVE64: successors: %bb.1(0x80000000)
|
||||
; WAVE64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16
|
||||
; WAVE64: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
; WAVE64: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16
|
||||
; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr16
|
||||
; WAVE64: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
|
||||
; WAVE64: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
|
||||
; WAVE64: [[ADD:%[0-9]+]]:vgpr_32(s32) = G_ADD [[COPY1]], [[COPY2]]
|
||||
; WAVE64: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]]
|
||||
; WAVE64: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
|
||||
; WAVE64: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||
; WAVE64: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||
|
@ -447,9 +447,11 @@ body: |
|
|||
; WAVE64: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
||||
; WAVE64: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.0, %11, %bb.1
|
||||
; WAVE64: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %4(s32), %bb.1
|
||||
; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[ADD]](s32), implicit $exec
|
||||
; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[ADD]](s32), implicit $exec
|
||||
; WAVE64: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32)
|
||||
; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
|
||||
; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
|
||||
; WAVE64: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
|
||||
; WAVE64: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[V_READFIRSTLANE_B32_]], [[C1]]
|
||||
; WAVE64: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[ADD1]](s32)
|
||||
; WAVE64: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; WAVE64: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; WAVE64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||
|
@ -462,10 +464,10 @@ body: |
|
|||
; WAVE32: successors: %bb.1(0x80000000)
|
||||
; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16
|
||||
; WAVE32: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
; WAVE32: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16
|
||||
; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr16
|
||||
; WAVE32: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
|
||||
; WAVE32: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
|
||||
; WAVE32: [[ADD:%[0-9]+]]:vgpr_32(s32) = G_ADD [[COPY1]], [[COPY2]]
|
||||
; WAVE32: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]]
|
||||
; WAVE32: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
|
||||
; WAVE32: [[DEF1:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
|
||||
; WAVE32: [[S_MOV_B32_term:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32_term $exec_lo
|
||||
|
@ -473,9 +475,11 @@ body: |
|
|||
; WAVE32: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
||||
; WAVE32: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF1]], %bb.0, %11, %bb.1
|
||||
; WAVE32: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %4(s32), %bb.1
|
||||
; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[ADD]](s32), implicit $exec
|
||||
; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[ADD]](s32), implicit $exec
|
||||
; WAVE32: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32)
|
||||
; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
|
||||
; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
|
||||
; WAVE32: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
|
||||
; WAVE32: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[V_READFIRSTLANE_B32_]], [[C1]]
|
||||
; WAVE32: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[ADD1]](s32)
|
||||
; WAVE32: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; WAVE32: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
|
||||
; WAVE32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||
|
@ -643,10 +647,10 @@ body: |
|
|||
; WAVE64: successors: %bb.1(0x80000000)
|
||||
; WAVE64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16
|
||||
; WAVE64: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
; WAVE64: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16
|
||||
; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr16
|
||||
; WAVE64: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
|
||||
; WAVE64: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
|
||||
; WAVE64: [[ADD:%[0-9]+]]:vgpr_32(s32) = G_ADD [[COPY1]], [[COPY2]]
|
||||
; WAVE64: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]]
|
||||
; WAVE64: [[BITCAST:%[0-9]+]]:vgpr(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>)
|
||||
; WAVE64: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
|
||||
; WAVE64: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF
|
||||
|
@ -662,12 +666,14 @@ body: |
|
|||
; WAVE64: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF1]](s32), %bb.0, %11(s32), %bb.1
|
||||
; WAVE64: [[PHI3:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF2]](s32), %bb.0, %6(s32), %bb.1
|
||||
; WAVE64: [[PHI4:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF3]](s32), %bb.0, %7(s32), %bb.1
|
||||
; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[ADD]](s32), implicit $exec
|
||||
; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[ADD]](s32), implicit $exec
|
||||
; WAVE64: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[V_READFIRSTLANE_B32_]], [[C1]](s32)
|
||||
; WAVE64: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C1]]
|
||||
; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
|
||||
; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
|
||||
; WAVE64: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
|
||||
; WAVE64: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[V_READFIRSTLANE_B32_]], [[C2]]
|
||||
; WAVE64: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[ADD1]], [[C1]](s32)
|
||||
; WAVE64: [[ADD2:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C1]]
|
||||
; WAVE64: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32)
|
||||
; WAVE64: [[EVEC1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD1]](s32)
|
||||
; WAVE64: [[EVEC1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD2]](s32)
|
||||
; WAVE64: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; WAVE64: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; WAVE64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||
|
@ -681,10 +687,10 @@ body: |
|
|||
; WAVE32: successors: %bb.1(0x80000000)
|
||||
; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16
|
||||
; WAVE32: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
; WAVE32: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16
|
||||
; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr16
|
||||
; WAVE32: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
|
||||
; WAVE32: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
|
||||
; WAVE32: [[ADD:%[0-9]+]]:vgpr_32(s32) = G_ADD [[COPY1]], [[COPY2]]
|
||||
; WAVE32: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]]
|
||||
; WAVE32: [[BITCAST:%[0-9]+]]:vgpr(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>)
|
||||
; WAVE32: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
|
||||
; WAVE32: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF
|
||||
|
@ -700,12 +706,14 @@ body: |
|
|||
; WAVE32: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF1]](s32), %bb.0, %11(s32), %bb.1
|
||||
; WAVE32: [[PHI3:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF2]](s32), %bb.0, %6(s32), %bb.1
|
||||
; WAVE32: [[PHI4:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF3]](s32), %bb.0, %7(s32), %bb.1
|
||||
; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[ADD]](s32), implicit $exec
|
||||
; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[ADD]](s32), implicit $exec
|
||||
; WAVE32: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[V_READFIRSTLANE_B32_]], [[C1]](s32)
|
||||
; WAVE32: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C1]]
|
||||
; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
|
||||
; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
|
||||
; WAVE32: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
|
||||
; WAVE32: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[V_READFIRSTLANE_B32_]], [[C2]]
|
||||
; WAVE32: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[ADD1]], [[C1]](s32)
|
||||
; WAVE32: [[ADD2:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C1]]
|
||||
; WAVE32: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32)
|
||||
; WAVE32: [[EVEC1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD1]](s32)
|
||||
; WAVE32: [[EVEC1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD2]](s32)
|
||||
; WAVE32: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; WAVE32: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
|
||||
; WAVE32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||
|
@ -736,10 +744,10 @@ body: |
|
|||
; WAVE64: successors: %bb.1(0x80000000)
|
||||
; WAVE64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0
|
||||
; WAVE64: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; WAVE64: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
|
||||
; WAVE64: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
|
||||
; WAVE64: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
|
||||
; WAVE64: [[ADD:%[0-9]+]]:vgpr_32(s32) = G_ADD [[COPY1]], [[COPY2]]
|
||||
; WAVE64: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]]
|
||||
; WAVE64: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
|
||||
; WAVE64: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||
; WAVE64: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||
|
@ -747,9 +755,11 @@ body: |
|
|||
; WAVE64: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
||||
; WAVE64: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.0, %11, %bb.1
|
||||
; WAVE64: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %4(s32), %bb.1
|
||||
; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[ADD]](s32), implicit $exec
|
||||
; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[ADD]](s32), implicit $exec
|
||||
; WAVE64: [[EVEC:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32)
|
||||
; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
|
||||
; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
|
||||
; WAVE64: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
|
||||
; WAVE64: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[V_READFIRSTLANE_B32_]], [[C1]]
|
||||
; WAVE64: [[EVEC:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[ADD1]](s32)
|
||||
; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32(s32) = V_MOV_B32_e32 [[EVEC]](s32), implicit $exec
|
||||
; WAVE64: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; WAVE64: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
|
@ -763,10 +773,10 @@ body: |
|
|||
; WAVE32: successors: %bb.1(0x80000000)
|
||||
; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0
|
||||
; WAVE32: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; WAVE32: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
|
||||
; WAVE32: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
|
||||
; WAVE32: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
|
||||
; WAVE32: [[ADD:%[0-9]+]]:vgpr_32(s32) = G_ADD [[COPY1]], [[COPY2]]
|
||||
; WAVE32: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]]
|
||||
; WAVE32: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
|
||||
; WAVE32: [[DEF1:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
|
||||
; WAVE32: [[S_MOV_B32_term:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32_term $exec_lo
|
||||
|
@ -774,9 +784,11 @@ body: |
|
|||
; WAVE32: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
||||
; WAVE32: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF1]], %bb.0, %11, %bb.1
|
||||
; WAVE32: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %4(s32), %bb.1
|
||||
; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[ADD]](s32), implicit $exec
|
||||
; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[ADD]](s32), implicit $exec
|
||||
; WAVE32: [[EVEC:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32)
|
||||
; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
|
||||
; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
|
||||
; WAVE32: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
|
||||
; WAVE32: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[V_READFIRSTLANE_B32_]], [[C1]]
|
||||
; WAVE32: [[EVEC:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[ADD1]](s32)
|
||||
; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32(s32) = V_MOV_B32_e32 [[EVEC]](s32), implicit $exec
|
||||
; WAVE32: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; WAVE32: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
|
||||
|
@ -807,10 +819,10 @@ body: |
|
|||
; WAVE64: successors: %bb.1(0x80000000)
|
||||
; WAVE64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0
|
||||
; WAVE64: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; WAVE64: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
|
||||
; WAVE64: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
|
||||
; WAVE64: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
|
||||
; WAVE64: [[ADD:%[0-9]+]]:vgpr_32(s32) = G_ADD [[COPY1]], [[COPY2]]
|
||||
; WAVE64: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]]
|
||||
; WAVE64: [[BITCAST:%[0-9]+]]:sgpr(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>)
|
||||
; WAVE64: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
|
||||
; WAVE64: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF
|
||||
|
@ -826,12 +838,14 @@ body: |
|
|||
; WAVE64: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF1]](s32), %bb.0, %11(s32), %bb.1
|
||||
; WAVE64: [[PHI3:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF2]](s32), %bb.0, %6(s32), %bb.1
|
||||
; WAVE64: [[PHI4:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF3]](s32), %bb.0, %7(s32), %bb.1
|
||||
; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[ADD]](s32), implicit $exec
|
||||
; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[ADD]](s32), implicit $exec
|
||||
; WAVE64: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[V_READFIRSTLANE_B32_]], [[C1]](s32)
|
||||
; WAVE64: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C1]]
|
||||
; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
|
||||
; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
|
||||
; WAVE64: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
|
||||
; WAVE64: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[V_READFIRSTLANE_B32_]], [[C2]]
|
||||
; WAVE64: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[ADD1]], [[C1]](s32)
|
||||
; WAVE64: [[ADD2:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C1]]
|
||||
; WAVE64: [[EVEC:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32)
|
||||
; WAVE64: [[EVEC1:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD1]](s32)
|
||||
; WAVE64: [[EVEC1:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD2]](s32)
|
||||
; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32(s32) = V_MOV_B32_e32 [[EVEC]](s32), implicit $exec
|
||||
; WAVE64: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32(s32) = V_MOV_B32_e32 [[EVEC1]](s32), implicit $exec
|
||||
; WAVE64: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
|
@ -847,10 +861,10 @@ body: |
|
|||
; WAVE32: successors: %bb.1(0x80000000)
|
||||
; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0
|
||||
; WAVE32: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; WAVE32: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
|
||||
; WAVE32: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
|
||||
; WAVE32: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
|
||||
; WAVE32: [[ADD:%[0-9]+]]:vgpr_32(s32) = G_ADD [[COPY1]], [[COPY2]]
|
||||
; WAVE32: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]]
|
||||
; WAVE32: [[BITCAST:%[0-9]+]]:sgpr(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>)
|
||||
; WAVE32: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
|
||||
; WAVE32: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF
|
||||
|
@ -866,12 +880,14 @@ body: |
|
|||
; WAVE32: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF1]](s32), %bb.0, %11(s32), %bb.1
|
||||
; WAVE32: [[PHI3:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF2]](s32), %bb.0, %6(s32), %bb.1
|
||||
; WAVE32: [[PHI4:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF3]](s32), %bb.0, %7(s32), %bb.1
|
||||
; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[ADD]](s32), implicit $exec
|
||||
; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[ADD]](s32), implicit $exec
|
||||
; WAVE32: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[V_READFIRSTLANE_B32_]], [[C1]](s32)
|
||||
; WAVE32: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C1]]
|
||||
; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
|
||||
; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
|
||||
; WAVE32: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
|
||||
; WAVE32: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[V_READFIRSTLANE_B32_]], [[C2]]
|
||||
; WAVE32: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[ADD1]], [[C1]](s32)
|
||||
; WAVE32: [[ADD2:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C1]]
|
||||
; WAVE32: [[EVEC:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32)
|
||||
; WAVE32: [[EVEC1:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD1]](s32)
|
||||
; WAVE32: [[EVEC1:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD2]](s32)
|
||||
; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32(s32) = V_MOV_B32_e32 [[EVEC]](s32), implicit $exec
|
||||
; WAVE32: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32(s32) = V_MOV_B32_e32 [[EVEC1]](s32), implicit $exec
|
||||
; WAVE32: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
|
|
Loading…
Reference in New Issue