forked from OSchip/llvm-project
[AMDGPU] Match v_swap_b32
Differential Revision: https://reviews.llvm.org/D52677 llvm-svn: 345514
This commit is contained in:
parent
61c9de7565
commit
79080ecd82
|
@ -516,6 +516,10 @@ public:
|
|||
return FMA;
|
||||
}
|
||||
|
||||
bool hasSwap() const {
|
||||
return GFX9Insts;
|
||||
}
|
||||
|
||||
TrapHandlerAbi getTrapHandlerAbi() const {
|
||||
return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
|
||||
}
|
||||
|
|
|
@ -212,6 +212,169 @@ static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) {
|
|||
}
|
||||
}
|
||||
|
||||
// This is the same as MachineInstr::readsRegister/modifiesRegister except
|
||||
// it takes subregs into account.
|
||||
static bool instAccessReg(iterator_range<MachineInstr::const_mop_iterator> &&R,
|
||||
unsigned Reg, unsigned SubReg,
|
||||
const SIRegisterInfo &TRI) {
|
||||
for (const MachineOperand &MO : R) {
|
||||
if (!MO.isReg())
|
||||
continue;
|
||||
|
||||
if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
|
||||
TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
|
||||
if (TRI.regsOverlap(Reg, MO.getReg()))
|
||||
return true;
|
||||
} else if (MO.getReg() == Reg &&
|
||||
TargetRegisterInfo::isVirtualRegister(Reg)) {
|
||||
LaneBitmask Overlap = TRI.getSubRegIndexLaneMask(SubReg) &
|
||||
TRI.getSubRegIndexLaneMask(MO.getSubReg());
|
||||
if (Overlap.any())
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool instReadsReg(const MachineInstr *MI,
|
||||
unsigned Reg, unsigned SubReg,
|
||||
const SIRegisterInfo &TRI) {
|
||||
return instAccessReg(MI->uses(), Reg, SubReg, TRI);
|
||||
}
|
||||
|
||||
static bool instModifiesReg(const MachineInstr *MI,
|
||||
unsigned Reg, unsigned SubReg,
|
||||
const SIRegisterInfo &TRI) {
|
||||
return instAccessReg(MI->defs(), Reg, SubReg, TRI);
|
||||
}
|
||||
|
||||
static TargetInstrInfo::RegSubRegPair
|
||||
getSubRegForIndex(unsigned Reg, unsigned Sub, unsigned I,
|
||||
const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI) {
|
||||
if (TRI.getRegSizeInBits(Reg, MRI) != 32) {
|
||||
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
|
||||
Reg = TRI.getSubReg(Reg, TRI.getSubRegFromChannel(I));
|
||||
} else {
|
||||
LaneBitmask LM = TRI.getSubRegIndexLaneMask(Sub);
|
||||
Sub = TRI.getSubRegFromChannel(I + countTrailingZeros(LM.getAsInteger()));
|
||||
}
|
||||
}
|
||||
return TargetInstrInfo::RegSubRegPair(Reg, Sub);
|
||||
}
|
||||
|
||||
// Match:
|
||||
// mov t, x
|
||||
// mov x, y
|
||||
// mov y, t
|
||||
//
|
||||
// =>
|
||||
//
|
||||
// mov t, x (t is potentially dead and move eliminated)
|
||||
// v_swap_b32 x, y
|
||||
//
|
||||
// Returns next valid instruction pointer if was able to create v_swap_b32.
|
||||
//
|
||||
// This shall not be done too early not to prevent possible folding which may
|
||||
// remove matched moves, and this should prefereably be done before RA to
|
||||
// release saved registers and also possibly after RA which can insert copies
|
||||
// too.
|
||||
//
|
||||
// This is really just a generic peephole that is not a canocical shrinking,
|
||||
// although requirements match the pass placement and it reduces code size too.
|
||||
static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
|
||||
const SIInstrInfo *TII) {
|
||||
assert(MovT.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
|
||||
MovT.getOpcode() == AMDGPU::COPY);
|
||||
|
||||
unsigned T = MovT.getOperand(0).getReg();
|
||||
unsigned Tsub = MovT.getOperand(0).getSubReg();
|
||||
MachineOperand &Xop = MovT.getOperand(1);
|
||||
|
||||
if (!Xop.isReg())
|
||||
return nullptr;
|
||||
unsigned X = Xop.getReg();
|
||||
unsigned Xsub = Xop.getSubReg();
|
||||
|
||||
unsigned Size = TII->getOpSize(MovT, 0) / 4;
|
||||
|
||||
const SIRegisterInfo &TRI = TII->getRegisterInfo();
|
||||
if (!TRI.isVGPR(MRI, X))
|
||||
return false;
|
||||
|
||||
for (MachineOperand &YTop : MRI.use_nodbg_operands(T)) {
|
||||
if (YTop.getSubReg() != Tsub)
|
||||
continue;
|
||||
|
||||
MachineInstr &MovY = *YTop.getParent();
|
||||
if ((MovY.getOpcode() != AMDGPU::V_MOV_B32_e32 &&
|
||||
MovY.getOpcode() != AMDGPU::COPY) ||
|
||||
MovY.getOperand(1).getSubReg() != Tsub)
|
||||
continue;
|
||||
|
||||
unsigned Y = MovY.getOperand(0).getReg();
|
||||
unsigned Ysub = MovY.getOperand(0).getSubReg();
|
||||
|
||||
if (!TRI.isVGPR(MRI, Y) || MovT.getParent() != MovY.getParent())
|
||||
continue;
|
||||
|
||||
MachineInstr *MovX = nullptr;
|
||||
auto I = std::next(MovT.getIterator()), E = MovT.getParent()->instr_end();
|
||||
for (auto IY = MovY.getIterator(); I != E && I != IY; ++I) {
|
||||
if (instReadsReg(&*I, X, Xsub, TRI) ||
|
||||
instModifiesReg(&*I, Y, Ysub, TRI) ||
|
||||
instModifiesReg(&*I, T, Tsub, TRI) ||
|
||||
(MovX && instModifiesReg(&*I, X, Xsub, TRI))) {
|
||||
MovX = nullptr;
|
||||
break;
|
||||
}
|
||||
if (!instReadsReg(&*I, Y, Ysub, TRI)) {
|
||||
if (!MovX && instModifiesReg(&*I, X, Xsub, TRI)) {
|
||||
MovX = nullptr;
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (MovX ||
|
||||
(I->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
|
||||
I->getOpcode() != AMDGPU::COPY) ||
|
||||
I->getOperand(0).getReg() != X ||
|
||||
I->getOperand(0).getSubReg() != Xsub) {
|
||||
MovX = nullptr;
|
||||
break;
|
||||
}
|
||||
MovX = &*I;
|
||||
}
|
||||
|
||||
if (!MovX || I == E)
|
||||
continue;
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << MovY);
|
||||
|
||||
for (unsigned I = 0; I < Size; ++I) {
|
||||
TargetInstrInfo::RegSubRegPair X1, Y1;
|
||||
X1 = getSubRegForIndex(X, Xsub, I, TRI, MRI);
|
||||
Y1 = getSubRegForIndex(Y, Ysub, I, TRI, MRI);
|
||||
BuildMI(*MovT.getParent(), MovX->getIterator(), MovT.getDebugLoc(),
|
||||
TII->get(AMDGPU::V_SWAP_B32))
|
||||
.addDef(X1.Reg, 0, X1.SubReg)
|
||||
.addDef(Y1.Reg, 0, Y1.SubReg)
|
||||
.addReg(Y1.Reg, 0, Y1.SubReg)
|
||||
.addReg(X1.Reg, 0, X1.SubReg).getInstr();
|
||||
}
|
||||
MovX->eraseFromParent();
|
||||
MovY.eraseFromParent();
|
||||
MachineInstr *Next = &*std::next(MovT.getIterator());
|
||||
if (MRI.use_nodbg_empty(T))
|
||||
MovT.eraseFromParent();
|
||||
else
|
||||
Xop.setIsKill(false);
|
||||
|
||||
return Next;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
|
||||
if (skipFunction(MF.getFunction()))
|
||||
return false;
|
||||
|
@ -252,6 +415,14 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
|
|||
}
|
||||
}
|
||||
|
||||
if (ST.hasSwap() && (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
|
||||
MI.getOpcode() == AMDGPU::COPY)) {
|
||||
if (auto *NextMI = matchSwap(MI, MRI, TII)) {
|
||||
Next = NextMI->getIterator();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Combine adjacent s_nops to use the immediate operand encoding how long
|
||||
// to wait.
|
||||
//
|
||||
|
|
|
@ -0,0 +1,564 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-shrink-instructions -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
|
||||
|
||||
# GCN-LABEL: name: swap_phys_condensed
|
||||
# GCN: bb.0:
|
||||
# GCN-NEXT: $vgpr0, $vgpr1 = V_SWAP_B32 $vgpr1, $vgpr0, implicit $exec
|
||||
# GCN-NEXT: S_SETPC_B64_return
|
||||
---
|
||||
name: swap_phys_condensed
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
|
||||
$vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
|
||||
$vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
|
||||
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: swap_phys_sparse
|
||||
# GCN: bb.0:
|
||||
# GCN-NEXT: $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec
|
||||
# GCN-NEXT: $vgpr0, $vgpr1 = V_SWAP_B32 $vgpr1, $vgpr0, implicit $exec
|
||||
# GCN-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec
|
||||
# GCN-NEXT: S_SETPC_B64_return
|
||||
---
|
||||
name: swap_phys_sparse
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
|
||||
$vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
|
||||
$vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
|
||||
$vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec
|
||||
$vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
|
||||
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: swap_phys_liveout
|
||||
# GCN: bb.0:
|
||||
# GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
|
||||
# GCN-NEXT: $vgpr0, $vgpr1 = V_SWAP_B32 $vgpr1, $vgpr0, implicit $exec
|
||||
# GCN-NEXT: S_SETPC_B64_return
|
||||
---
|
||||
name: swap_phys_liveout
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
|
||||
$vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
|
||||
$vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
|
||||
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr2, implicit $vgpr1
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: swap_phys_b64
|
||||
# GCN: bb.0:
|
||||
# GCN-NEXT: $vgpr0, $vgpr2 = V_SWAP_B32 $vgpr2, $vgpr0, implicit $exec
|
||||
# GCN-NEXT: $vgpr1, $vgpr3 = V_SWAP_B32 $vgpr3, $vgpr1, implicit $exec
|
||||
---
|
||||
name: swap_phys_b64
|
||||
body: |
|
||||
bb.0:
|
||||
$vgpr4_vgpr5 = COPY killed $vgpr0_vgpr1
|
||||
$vgpr0_vgpr1 = COPY killed $vgpr2_vgpr3
|
||||
$vgpr2_vgpr3 = COPY killed $vgpr4_vgpr5
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: swap_phys_overlap_x
|
||||
# GCN: bb.0:
|
||||
# GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
|
||||
# GCN-NEXT: $vgpr3_vgpr4 = V_ADD_F64 0, $vgpr0_vgpr1, 0, $vgpr3_vgpr4, 0, 0, implicit $exec
|
||||
# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
|
||||
# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
|
||||
---
|
||||
name: swap_phys_overlap_x
|
||||
body: |
|
||||
bb.0:
|
||||
$vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
|
||||
$vgpr3_vgpr4 = V_ADD_F64 0, $vgpr0_vgpr1, 0, $vgpr3_vgpr4, 0, 0, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
|
||||
$vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: swap_phys_clobber_y
|
||||
# GCN: bb.0:
|
||||
# GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
|
||||
# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
|
||||
# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
|
||||
# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
|
||||
---
|
||||
name: swap_phys_clobber_y
|
||||
body: |
|
||||
bb.0:
|
||||
$vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
|
||||
$vgpr1 = V_MOV_B32_e32 0, implicit $exec
|
||||
$vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
|
||||
S_ENDPGM
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: swap_virt_copy_condense
|
||||
# GCN: %0:vgpr_32, %1:vgpr_32 = V_SWAP_B32 %1, %0, implicit $exec
|
||||
---
|
||||
name: swap_virt_copy_condense
|
||||
registers:
|
||||
- { id: 0, class: vgpr_32 }
|
||||
- { id: 1, class: vgpr_32 }
|
||||
- { id: 2, class: vgpr_32 }
|
||||
body: |
|
||||
bb.0:
|
||||
%0 = IMPLICIT_DEF
|
||||
%1 = IMPLICIT_DEF
|
||||
%2 = COPY %0
|
||||
%0 = COPY %1
|
||||
%1 = COPY %2
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: swap_virt_copy_sparse
|
||||
# GCN: %0:vgpr_32, %1:vgpr_32 = V_SWAP_B32 %1, %0, implicit $exec
|
||||
---
|
||||
name: swap_virt_copy_sparse
|
||||
registers:
|
||||
- { id: 0, class: vgpr_32 }
|
||||
- { id: 1, class: vgpr_32 }
|
||||
- { id: 2, class: vgpr_32 }
|
||||
body: |
|
||||
bb.0:
|
||||
%0 = IMPLICIT_DEF
|
||||
%1 = IMPLICIT_DEF
|
||||
%2 = COPY %0
|
||||
S_NOP 0
|
||||
%0 = COPY %1
|
||||
S_NOP 0
|
||||
%1 = COPY %2
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: swap_virt_copy_subreg
|
||||
# GCN: %0.sub0:vreg_64, %1.sub0:vreg_64 = V_SWAP_B32 %1.sub0, %0.sub0, implicit $exec
|
||||
---
|
||||
name: swap_virt_copy_subreg
|
||||
registers:
|
||||
- { id: 0, class: vreg_64 }
|
||||
- { id: 1, class: vreg_64 }
|
||||
- { id: 2, class: vreg_64 }
|
||||
body: |
|
||||
bb.0:
|
||||
%0 = IMPLICIT_DEF
|
||||
%1 = IMPLICIT_DEF
|
||||
%2.sub0 = COPY %0.sub0
|
||||
%2.sub1 = COPY %0.sub1
|
||||
%0.sub0 = COPY %1.sub0
|
||||
%0.sub1 = COPY %1.sub1
|
||||
%1.sub0 = COPY %2.sub0
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: swap_virt_mov
|
||||
# GCN: %0:vgpr_32, %1:vgpr_32 = V_SWAP_B32 %1, %0, implicit $exec
|
||||
---
|
||||
name: swap_virt_mov
|
||||
registers:
|
||||
- { id: 0, class: vgpr_32 }
|
||||
- { id: 1, class: vgpr_32 }
|
||||
- { id: 2, class: vgpr_32 }
|
||||
body: |
|
||||
bb.0:
|
||||
%0 = IMPLICIT_DEF
|
||||
%1 = IMPLICIT_DEF
|
||||
%2 = V_MOV_B32_e32 %0, implicit $exec
|
||||
%0 = V_MOV_B32_e32 %1, implicit $exec
|
||||
%1 = V_MOV_B32_e32 %2, implicit $exec
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: swap_virt_read_x
|
||||
# GCN: bb.0:
|
||||
# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %2:vgpr_32 = COPY %0
|
||||
# GCN-NEXT: %3:vgpr_32 = COPY %0
|
||||
# GCN-NEXT: %0:vgpr_32 = COPY %1
|
||||
# GCN-NEXT: %1:vgpr_32 = COPY %2
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
|
||||
---
|
||||
name: swap_virt_read_x
|
||||
registers:
|
||||
- { id: 0, class: vgpr_32 }
|
||||
- { id: 1, class: vgpr_32 }
|
||||
- { id: 2, class: vgpr_32 }
|
||||
- { id: 3, class: vgpr_32 }
|
||||
body: |
|
||||
bb.0:
|
||||
%0 = IMPLICIT_DEF
|
||||
%1 = IMPLICIT_DEF
|
||||
%2 = COPY %0
|
||||
%3 = COPY %0
|
||||
%0 = COPY %1
|
||||
%1 = COPY %2
|
||||
S_ENDPGM
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: swap_virt_read_t_twice
|
||||
# GCN: bb.0:
|
||||
# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %2:vgpr_32 = COPY %0
|
||||
# GCN-NEXT: %3:vgpr_32 = COPY %2
|
||||
# GCN-NEXT: %0:vgpr_32, %1:vgpr_32 = V_SWAP_B32 %1, %0, implicit $exec
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
|
||||
---
|
||||
name: swap_virt_read_t_twice
|
||||
registers:
|
||||
- { id: 0, class: vgpr_32 }
|
||||
- { id: 1, class: vgpr_32 }
|
||||
- { id: 2, class: vgpr_32 }
|
||||
- { id: 3, class: vgpr_32 }
|
||||
body: |
|
||||
bb.0:
|
||||
%0 = IMPLICIT_DEF
|
||||
%1 = IMPLICIT_DEF
|
||||
%2 = COPY %0
|
||||
%3 = COPY %2
|
||||
%0 = COPY %1
|
||||
%1 = COPY %2
|
||||
S_ENDPGM
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: swap_virt_clobber_y
|
||||
# GCN: bb.0:
|
||||
# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %2:vgpr_32 = COPY %0
|
||||
# GCN-NEXT: %0:vgpr_32 = COPY %1
|
||||
# GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %1:vgpr_32 = COPY %2
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
|
||||
---
|
||||
name: swap_virt_clobber_y
|
||||
registers:
|
||||
- { id: 0, class: vgpr_32 }
|
||||
- { id: 1, class: vgpr_32 }
|
||||
- { id: 2, class: vgpr_32 }
|
||||
body: |
|
||||
bb.0:
|
||||
%0 = IMPLICIT_DEF
|
||||
%1 = IMPLICIT_DEF
|
||||
%2 = COPY %0
|
||||
%0 = COPY %1
|
||||
%1 = IMPLICIT_DEF
|
||||
%1 = COPY %2
|
||||
S_ENDPGM
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: swap_virt_clobber_x1
|
||||
# GCN: bb.0:
|
||||
# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %2:vgpr_32 = COPY %0
|
||||
# GCN-NEXT: %0:vgpr_32 = COPY %1
|
||||
# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %1:vgpr_32 = COPY %2
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
|
||||
---
|
||||
name: swap_virt_clobber_x1
|
||||
registers:
|
||||
- { id: 0, class: vgpr_32 }
|
||||
- { id: 1, class: vgpr_32 }
|
||||
- { id: 2, class: vgpr_32 }
|
||||
body: |
|
||||
bb.0:
|
||||
%0 = IMPLICIT_DEF
|
||||
%1 = IMPLICIT_DEF
|
||||
%2 = COPY %0
|
||||
%0 = COPY %1
|
||||
%0 = IMPLICIT_DEF
|
||||
%1 = COPY %2
|
||||
S_ENDPGM
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: swap_virt_clobber_x2
|
||||
# GCN: bb.0:
|
||||
# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %2:vgpr_32 = COPY %0
|
||||
# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %0:vgpr_32 = COPY %1
|
||||
# GCN-NEXT: %1:vgpr_32 = COPY %2
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
|
||||
---
|
||||
name: swap_virt_clobber_x2
|
||||
registers:
|
||||
- { id: 0, class: vgpr_32 }
|
||||
- { id: 1, class: vgpr_32 }
|
||||
- { id: 2, class: vgpr_32 }
|
||||
body: |
|
||||
bb.0:
|
||||
%0 = IMPLICIT_DEF
|
||||
%1 = IMPLICIT_DEF
|
||||
%2 = COPY %0
|
||||
%0 = IMPLICIT_DEF
|
||||
%0 = COPY %1
|
||||
%1 = COPY %2
|
||||
S_ENDPGM
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: swap_virt_clobber_t
|
||||
# GCN: bb.0:
|
||||
# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %2:vgpr_32 = COPY %0
|
||||
# GCN-NEXT: %0:vgpr_32 = COPY %1
|
||||
# GCN-NEXT: %2:vgpr_32 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %1:vgpr_32 = COPY %2
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
|
||||
---
|
||||
name: swap_virt_clobber_t
|
||||
registers:
|
||||
- { id: 0, class: vgpr_32 }
|
||||
- { id: 1, class: vgpr_32 }
|
||||
- { id: 2, class: vgpr_32 }
|
||||
body: |
|
||||
bb.0:
|
||||
%0 = IMPLICIT_DEF
|
||||
%1 = IMPLICIT_DEF
|
||||
%2 = COPY %0
|
||||
%0 = COPY %1
|
||||
%2 = IMPLICIT_DEF
|
||||
%1 = COPY %2
|
||||
S_ENDPGM
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: swap_virt_copy_subreg_overlap_x_full
|
||||
# GCN: bb.0:
|
||||
# GCN-NEXT: %0:vreg_64 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %1:vreg_64 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %2.sub0:vreg_64 = COPY %0.sub0
|
||||
# GCN-NEXT: %3:vreg_64 = COPY %0
|
||||
# GCN-NEXT: %0.sub0:vreg_64 = COPY %1.sub0
|
||||
# GCN-NEXT: %1.sub0:vreg_64 = COPY %2.sub0
|
||||
---
|
||||
name: swap_virt_copy_subreg_overlap_x_full
|
||||
registers:
|
||||
- { id: 0, class: vreg_64 }
|
||||
- { id: 1, class: vreg_64 }
|
||||
- { id: 2, class: vreg_64 }
|
||||
- { id: 3, class: vreg_64 }
|
||||
body: |
|
||||
bb.0:
|
||||
%0 = IMPLICIT_DEF
|
||||
%1 = IMPLICIT_DEF
|
||||
%2.sub0 = COPY %0.sub0
|
||||
%3 = COPY %0
|
||||
%0.sub0 = COPY %1.sub0
|
||||
%1.sub0 = COPY %2.sub0
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: swap_virt_copy_subreg_overlap_x_part
|
||||
# GCN: bb.0:
|
||||
# GCN-NEXT: %0:vreg_128 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %1:vreg_64 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %2.sub0:vreg_64 = COPY %0.sub0
|
||||
# GCN-NEXT: %3:vreg_64 = COPY %0.sub0_sub1
|
||||
# GCN-NEXT: %0.sub0:vreg_128 = COPY %1.sub0
|
||||
# GCN-NEXT: %1.sub0:vreg_64 = COPY %2.sub0
|
||||
---
|
||||
name: swap_virt_copy_subreg_overlap_x_part
|
||||
registers:
|
||||
- { id: 0, class: vreg_128 }
|
||||
- { id: 1, class: vreg_64 }
|
||||
- { id: 2, class: vreg_64 }
|
||||
- { id: 3, class: vreg_64 }
|
||||
body: |
|
||||
bb.0:
|
||||
%0 = IMPLICIT_DEF
|
||||
%1 = IMPLICIT_DEF
|
||||
%2.sub0 = COPY %0.sub0
|
||||
%3 = COPY %0.sub0_sub1
|
||||
%0.sub0 = COPY %1.sub0
|
||||
%1.sub0 = COPY %2.sub0
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: swap_virt_copy_subreg_wide_y
|
||||
# GCN: bb.0:
|
||||
# GCN-NEXT: %0:vreg_64 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %1:vreg_64 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %2.sub0:vreg_64 = COPY %0.sub0
|
||||
# GCN-NEXT: %0.sub0:vreg_64 = COPY %1.sub0
|
||||
# GCN-NEXT: %1:vreg_64 = COPY %2
|
||||
---
|
||||
name: swap_virt_copy_subreg_wide_y
|
||||
registers:
|
||||
- { id: 0, class: vreg_64 }
|
||||
- { id: 1, class: vreg_64 }
|
||||
- { id: 2, class: vreg_64 }
|
||||
body: |
|
||||
bb.0:
|
||||
%0 = IMPLICIT_DEF
|
||||
%1 = IMPLICIT_DEF
|
||||
%2.sub0 = COPY %0.sub0
|
||||
%0.sub0 = COPY %1.sub0
|
||||
%1 = COPY %2
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: swap_virt_b64
|
||||
# GCN: bb.0:
|
||||
# GCN-NEXT: %0:vreg_64 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %1:vreg_64 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %0.sub0:vreg_64, %1.sub0:vreg_64 = V_SWAP_B32 %1.sub0, %0.sub0, implicit $exec
|
||||
# GCN-NEXT: %0.sub1:vreg_64, %1.sub1:vreg_64 = V_SWAP_B32 %1.sub1, %0.sub1, implicit $exec
|
||||
---
|
||||
name: swap_virt_b64
|
||||
registers:
|
||||
- { id: 0, class: vreg_64 }
|
||||
- { id: 1, class: vreg_64 }
|
||||
- { id: 2, class: vreg_64 }
|
||||
body: |
|
||||
bb.0:
|
||||
%0 = IMPLICIT_DEF
|
||||
%1 = IMPLICIT_DEF
|
||||
%2 = COPY %0
|
||||
%0 = COPY %1
|
||||
%1 = COPY %2
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: swap_virt_b128
|
||||
# GCN: bb.0:
|
||||
# GCN-NEXT: %0:vreg_128 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %1:vreg_128 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %0.sub0:vreg_128, %1.sub0:vreg_128 = V_SWAP_B32 %1.sub0, %0.sub0, implicit $exec
|
||||
# GCN-NEXT: %0.sub1:vreg_128, %1.sub1:vreg_128 = V_SWAP_B32 %1.sub1, %0.sub1, implicit $exec
|
||||
# GCN-NEXT: %0.sub2:vreg_128, %1.sub2:vreg_128 = V_SWAP_B32 %1.sub2, %0.sub2, implicit $exec
|
||||
# GCN-NEXT: %0.sub3:vreg_128, %1.sub3:vreg_128 = V_SWAP_B32 %1.sub3, %0.sub3, implicit $exec
|
||||
---
|
||||
name: swap_virt_b128
|
||||
registers:
|
||||
- { id: 0, class: vreg_128 }
|
||||
- { id: 1, class: vreg_128 }
|
||||
- { id: 2, class: vreg_128 }
|
||||
body: |
|
||||
bb.0:
|
||||
%0 = IMPLICIT_DEF
|
||||
%1 = IMPLICIT_DEF
|
||||
%2 = COPY %0
|
||||
%0 = COPY %1
|
||||
%1 = COPY %2
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: swap_virt_b128_sub0_1
|
||||
# GCN: bb.0:
|
||||
# GCN-NEXT: %0:vreg_128 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %1:vreg_128 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %0.sub0:vreg_128, %1.sub0:vreg_128 = V_SWAP_B32 %1.sub0, %0.sub0, implicit $exec
|
||||
# GCN-NEXT: %0.sub1:vreg_128, %1.sub1:vreg_128 = V_SWAP_B32 %1.sub1, %0.sub1, implicit $exec
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
---
|
||||
name: swap_virt_b128_sub0_1
|
||||
registers:
|
||||
- { id: 0, class: vreg_128 }
|
||||
- { id: 1, class: vreg_128 }
|
||||
- { id: 2, class: vreg_128 }
|
||||
body: |
|
||||
bb.0:
|
||||
%0 = IMPLICIT_DEF
|
||||
%1 = IMPLICIT_DEF
|
||||
%2.sub0_sub1 = COPY %0.sub0_sub1
|
||||
%0.sub0_sub1 = COPY %1.sub0_sub1
|
||||
%1.sub0_sub1 = COPY %2.sub0_sub1
|
||||
S_ENDPGM
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: swap_virt_b128_sub2_3
|
||||
# GCN: bb.0:
|
||||
# GCN-NEXT: %0:vreg_128 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %1:vreg_128 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %0.sub2:vreg_128, %1.sub2:vreg_128 = V_SWAP_B32 %1.sub2, %0.sub2, implicit $exec
|
||||
# GCN-NEXT: %0.sub3:vreg_128, %1.sub3:vreg_128 = V_SWAP_B32 %1.sub3, %0.sub3, implicit $exec
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
---
|
||||
name: swap_virt_b128_sub2_3
|
||||
registers:
|
||||
- { id: 0, class: vreg_128 }
|
||||
- { id: 1, class: vreg_128 }
|
||||
- { id: 2, class: vreg_128 }
|
||||
body: |
|
||||
bb.0:
|
||||
%0 = IMPLICIT_DEF
|
||||
%1 = IMPLICIT_DEF
|
||||
%2.sub2_sub3 = COPY %0.sub2_sub3
|
||||
%0.sub2_sub3 = COPY %1.sub2_sub3
|
||||
%1.sub2_sub3 = COPY %2.sub2_sub3
|
||||
S_ENDPGM
|
||||
...
|
||||
|
||||
|
||||
# GCN-LABEL: name: swap_virt_s_to_s
|
||||
# GCN: bb.0:
|
||||
# GCN-NEXT: %0:sgpr_32 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %1:sgpr_32 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %2:sgpr_32 = COPY %0
|
||||
# GCN-NEXT: %0:sgpr_32 = COPY %1
|
||||
# GCN-NEXT: %1:sgpr_32 = COPY %2
|
||||
---
|
||||
name: swap_virt_s_to_s
|
||||
registers:
|
||||
- { id: 0, class: sgpr_32 }
|
||||
- { id: 1, class: sgpr_32 }
|
||||
- { id: 2, class: sgpr_32 }
|
||||
body: |
|
||||
bb.0:
|
||||
%0 = IMPLICIT_DEF
|
||||
%1 = IMPLICIT_DEF
|
||||
%2 = COPY %0
|
||||
%0 = COPY %1
|
||||
%1 = COPY %2
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: swap_virt_copy_subreg_impdef_super
|
||||
# GCN: %0.sub0:vreg_64, %1.sub0:vreg_64 = V_SWAP_B32 %1.sub0, %0.sub0, implicit $exec
|
||||
---
|
||||
name: swap_virt_copy_subreg_impdef_super
|
||||
registers:
|
||||
- { id: 0, class: vreg_64 }
|
||||
- { id: 1, class: vreg_64 }
|
||||
- { id: 2, class: vreg_64 }
|
||||
body: |
|
||||
bb.0:
|
||||
%0 = IMPLICIT_DEF
|
||||
%1 = IMPLICIT_DEF
|
||||
%2.sub0 = COPY %0.sub0, implicit-def %2, implicit $exec
|
||||
%2.sub1 = COPY %0.sub1
|
||||
%0.sub0 = COPY %1.sub0
|
||||
%0.sub1 = COPY %1.sub1
|
||||
%1.sub0 = COPY %2.sub0
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: swap_virt_copy_subreg_impuse_x
|
||||
# GCN: bb.0:
|
||||
# GCN-NEXT: %0:vreg_64 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %1:vreg_64 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %2.sub0:vreg_64 = COPY %0.sub0
|
||||
# GCN-NEXT: %2.sub1:vreg_64 = COPY %0.sub1
|
||||
# GCN-NEXT: %0.sub0:vreg_64 = COPY %1.sub0, implicit %0
|
||||
# GCN-NEXT: %0.sub1:vreg_64 = COPY %1.sub1
|
||||
# GCN-NEXT: %1.sub0:vreg_64 = COPY %2.sub0
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
---
|
||||
name: swap_virt_copy_subreg_impuse_x
|
||||
registers:
|
||||
- { id: 0, class: vreg_64 }
|
||||
- { id: 1, class: vreg_64 }
|
||||
- { id: 2, class: vreg_64 }
|
||||
body: |
|
||||
bb.0:
|
||||
%0 = IMPLICIT_DEF
|
||||
%1 = IMPLICIT_DEF
|
||||
%2.sub0 = COPY %0.sub0
|
||||
%2.sub1 = COPY %0.sub1
|
||||
%0.sub0 = COPY %1.sub0, implicit %0
|
||||
%0.sub1 = COPY %1.sub1
|
||||
%1.sub0 = COPY %2.sub0
|
||||
S_ENDPGM
|
||||
...
|
Loading…
Reference in New Issue