[AMDGPU] Match v_swap_b32

Differential Revision: https://reviews.llvm.org/D52677

llvm-svn: 345514
This commit is contained in:
Stanislav Mekhanoshin 2018-10-29 17:26:01 +00:00
parent 61c9de7565
commit 79080ecd82
3 changed files with 739 additions and 0 deletions

View File

@ -516,6 +516,10 @@ public:
return FMA;
}
bool hasSwap() const {
return GFX9Insts;
}
TrapHandlerAbi getTrapHandlerAbi() const {
return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
}

View File

@ -212,6 +212,169 @@ static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) {
}
}
// This is the same as MachineInstr::readsRegister/modifiesRegister except
// it takes subregs into account.
static bool instAccessReg(iterator_range<MachineInstr::const_mop_iterator> &&R,
unsigned Reg, unsigned SubReg,
const SIRegisterInfo &TRI) {
for (const MachineOperand &MO : R) {
if (!MO.isReg())
continue;
if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
if (TRI.regsOverlap(Reg, MO.getReg()))
return true;
} else if (MO.getReg() == Reg &&
TargetRegisterInfo::isVirtualRegister(Reg)) {
LaneBitmask Overlap = TRI.getSubRegIndexLaneMask(SubReg) &
TRI.getSubRegIndexLaneMask(MO.getSubReg());
if (Overlap.any())
return true;
}
}
return false;
}
static bool instReadsReg(const MachineInstr *MI,
unsigned Reg, unsigned SubReg,
const SIRegisterInfo &TRI) {
return instAccessReg(MI->uses(), Reg, SubReg, TRI);
}
static bool instModifiesReg(const MachineInstr *MI,
unsigned Reg, unsigned SubReg,
const SIRegisterInfo &TRI) {
return instAccessReg(MI->defs(), Reg, SubReg, TRI);
}
static TargetInstrInfo::RegSubRegPair
getSubRegForIndex(unsigned Reg, unsigned Sub, unsigned I,
const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI) {
if (TRI.getRegSizeInBits(Reg, MRI) != 32) {
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
Reg = TRI.getSubReg(Reg, TRI.getSubRegFromChannel(I));
} else {
LaneBitmask LM = TRI.getSubRegIndexLaneMask(Sub);
Sub = TRI.getSubRegFromChannel(I + countTrailingZeros(LM.getAsInteger()));
}
}
return TargetInstrInfo::RegSubRegPair(Reg, Sub);
}
// Match:
// mov t, x
// mov x, y
// mov y, t
//
// =>
//
// mov t, x (t is potentially dead and move eliminated)
// v_swap_b32 x, y
//
// Returns next valid instruction pointer if was able to create v_swap_b32.
//
// This shall not be done too early not to prevent possible folding which may
// remove matched moves, and this should prefereably be done before RA to
// release saved registers and also possibly after RA which can insert copies
// too.
//
// This is really just a generic peephole that is not a canocical shrinking,
// although requirements match the pass placement and it reduces code size too.
static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
const SIInstrInfo *TII) {
assert(MovT.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
MovT.getOpcode() == AMDGPU::COPY);
unsigned T = MovT.getOperand(0).getReg();
unsigned Tsub = MovT.getOperand(0).getSubReg();
MachineOperand &Xop = MovT.getOperand(1);
if (!Xop.isReg())
return nullptr;
unsigned X = Xop.getReg();
unsigned Xsub = Xop.getSubReg();
unsigned Size = TII->getOpSize(MovT, 0) / 4;
const SIRegisterInfo &TRI = TII->getRegisterInfo();
if (!TRI.isVGPR(MRI, X))
return false;
for (MachineOperand &YTop : MRI.use_nodbg_operands(T)) {
if (YTop.getSubReg() != Tsub)
continue;
MachineInstr &MovY = *YTop.getParent();
if ((MovY.getOpcode() != AMDGPU::V_MOV_B32_e32 &&
MovY.getOpcode() != AMDGPU::COPY) ||
MovY.getOperand(1).getSubReg() != Tsub)
continue;
unsigned Y = MovY.getOperand(0).getReg();
unsigned Ysub = MovY.getOperand(0).getSubReg();
if (!TRI.isVGPR(MRI, Y) || MovT.getParent() != MovY.getParent())
continue;
MachineInstr *MovX = nullptr;
auto I = std::next(MovT.getIterator()), E = MovT.getParent()->instr_end();
for (auto IY = MovY.getIterator(); I != E && I != IY; ++I) {
if (instReadsReg(&*I, X, Xsub, TRI) ||
instModifiesReg(&*I, Y, Ysub, TRI) ||
instModifiesReg(&*I, T, Tsub, TRI) ||
(MovX && instModifiesReg(&*I, X, Xsub, TRI))) {
MovX = nullptr;
break;
}
if (!instReadsReg(&*I, Y, Ysub, TRI)) {
if (!MovX && instModifiesReg(&*I, X, Xsub, TRI)) {
MovX = nullptr;
break;
}
continue;
}
if (MovX ||
(I->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
I->getOpcode() != AMDGPU::COPY) ||
I->getOperand(0).getReg() != X ||
I->getOperand(0).getSubReg() != Xsub) {
MovX = nullptr;
break;
}
MovX = &*I;
}
if (!MovX || I == E)
continue;
LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << MovY);
for (unsigned I = 0; I < Size; ++I) {
TargetInstrInfo::RegSubRegPair X1, Y1;
X1 = getSubRegForIndex(X, Xsub, I, TRI, MRI);
Y1 = getSubRegForIndex(Y, Ysub, I, TRI, MRI);
BuildMI(*MovT.getParent(), MovX->getIterator(), MovT.getDebugLoc(),
TII->get(AMDGPU::V_SWAP_B32))
.addDef(X1.Reg, 0, X1.SubReg)
.addDef(Y1.Reg, 0, Y1.SubReg)
.addReg(Y1.Reg, 0, Y1.SubReg)
.addReg(X1.Reg, 0, X1.SubReg).getInstr();
}
MovX->eraseFromParent();
MovY.eraseFromParent();
MachineInstr *Next = &*std::next(MovT.getIterator());
if (MRI.use_nodbg_empty(T))
MovT.eraseFromParent();
else
Xop.setIsKill(false);
return Next;
}
return nullptr;
}
bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
@ -252,6 +415,14 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
}
}
if (ST.hasSwap() && (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
MI.getOpcode() == AMDGPU::COPY)) {
if (auto *NextMI = matchSwap(MI, MRI, TII)) {
Next = NextMI->getIterator();
continue;
}
}
// Combine adjacent s_nops to use the immediate operand encoding how long
// to wait.
//

View File

@ -0,0 +1,564 @@
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-shrink-instructions -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
# GCN-LABEL: name: swap_phys_condensed
# GCN: bb.0:
# GCN-NEXT: $vgpr0, $vgpr1 = V_SWAP_B32 $vgpr1, $vgpr0, implicit $exec
# GCN-NEXT: S_SETPC_B64_return
---
name: swap_phys_condensed
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
$vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
$vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
...
# GCN-LABEL: name: swap_phys_sparse
# GCN: bb.0:
# GCN-NEXT: $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec
# GCN-NEXT: $vgpr0, $vgpr1 = V_SWAP_B32 $vgpr1, $vgpr0, implicit $exec
# GCN-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec
# GCN-NEXT: S_SETPC_B64_return
---
name: swap_phys_sparse
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
$vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
$vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
$vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec
$vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
...
# GCN-LABEL: name: swap_phys_liveout
# GCN: bb.0:
# GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
# GCN-NEXT: $vgpr0, $vgpr1 = V_SWAP_B32 $vgpr1, $vgpr0, implicit $exec
# GCN-NEXT: S_SETPC_B64_return
---
name: swap_phys_liveout
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
$vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
$vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr2, implicit $vgpr1
...
# GCN-LABEL: name: swap_phys_b64
# GCN: bb.0:
# GCN-NEXT: $vgpr0, $vgpr2 = V_SWAP_B32 $vgpr2, $vgpr0, implicit $exec
# GCN-NEXT: $vgpr1, $vgpr3 = V_SWAP_B32 $vgpr3, $vgpr1, implicit $exec
---
name: swap_phys_b64
body: |
bb.0:
$vgpr4_vgpr5 = COPY killed $vgpr0_vgpr1
$vgpr0_vgpr1 = COPY killed $vgpr2_vgpr3
$vgpr2_vgpr3 = COPY killed $vgpr4_vgpr5
...
# GCN-LABEL: name: swap_phys_overlap_x
# GCN: bb.0:
# GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
# GCN-NEXT: $vgpr3_vgpr4 = V_ADD_F64 0, $vgpr0_vgpr1, 0, $vgpr3_vgpr4, 0, 0, implicit $exec
# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
---
name: swap_phys_overlap_x
body: |
bb.0:
$vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
$vgpr3_vgpr4 = V_ADD_F64 0, $vgpr0_vgpr1, 0, $vgpr3_vgpr4, 0, 0, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
$vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
...
# GCN-LABEL: name: swap_phys_clobber_y
# GCN: bb.0:
# GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
---
name: swap_phys_clobber_y
body: |
bb.0:
$vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
$vgpr1 = V_MOV_B32_e32 0, implicit $exec
$vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
S_ENDPGM
...
# GCN-LABEL: name: swap_virt_copy_condense
# GCN: %0:vgpr_32, %1:vgpr_32 = V_SWAP_B32 %1, %0, implicit $exec
---
name: swap_virt_copy_condense
registers:
- { id: 0, class: vgpr_32 }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2 = COPY %0
%0 = COPY %1
%1 = COPY %2
...
# GCN-LABEL: name: swap_virt_copy_sparse
# GCN: %0:vgpr_32, %1:vgpr_32 = V_SWAP_B32 %1, %0, implicit $exec
---
name: swap_virt_copy_sparse
registers:
- { id: 0, class: vgpr_32 }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2 = COPY %0
S_NOP 0
%0 = COPY %1
S_NOP 0
%1 = COPY %2
...
# GCN-LABEL: name: swap_virt_copy_subreg
# GCN: %0.sub0:vreg_64, %1.sub0:vreg_64 = V_SWAP_B32 %1.sub0, %0.sub0, implicit $exec
---
name: swap_virt_copy_subreg
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vreg_64 }
- { id: 2, class: vreg_64 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2.sub0 = COPY %0.sub0
%2.sub1 = COPY %0.sub1
%0.sub0 = COPY %1.sub0
%0.sub1 = COPY %1.sub1
%1.sub0 = COPY %2.sub0
...
# GCN-LABEL: name: swap_virt_mov
# GCN: %0:vgpr_32, %1:vgpr_32 = V_SWAP_B32 %1, %0, implicit $exec
---
name: swap_virt_mov
registers:
- { id: 0, class: vgpr_32 }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2 = V_MOV_B32_e32 %0, implicit $exec
%0 = V_MOV_B32_e32 %1, implicit $exec
%1 = V_MOV_B32_e32 %2, implicit $exec
...
# GCN-LABEL: name: swap_virt_read_x
# GCN: bb.0:
# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF
# GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF
# GCN-NEXT: %2:vgpr_32 = COPY %0
# GCN-NEXT: %3:vgpr_32 = COPY %0
# GCN-NEXT: %0:vgpr_32 = COPY %1
# GCN-NEXT: %1:vgpr_32 = COPY %2
# GCN-NEXT: S_ENDPGM
---
name: swap_virt_read_x
registers:
- { id: 0, class: vgpr_32 }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
- { id: 3, class: vgpr_32 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2 = COPY %0
%3 = COPY %0
%0 = COPY %1
%1 = COPY %2
S_ENDPGM
...
# GCN-LABEL: name: swap_virt_read_t_twice
# GCN: bb.0:
# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF
# GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF
# GCN-NEXT: %2:vgpr_32 = COPY %0
# GCN-NEXT: %3:vgpr_32 = COPY %2
# GCN-NEXT: %0:vgpr_32, %1:vgpr_32 = V_SWAP_B32 %1, %0, implicit $exec
# GCN-NEXT: S_ENDPGM
---
name: swap_virt_read_t_twice
registers:
- { id: 0, class: vgpr_32 }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
- { id: 3, class: vgpr_32 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2 = COPY %0
%3 = COPY %2
%0 = COPY %1
%1 = COPY %2
S_ENDPGM
...
# GCN-LABEL: name: swap_virt_clobber_y
# GCN: bb.0:
# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF
# GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF
# GCN-NEXT: %2:vgpr_32 = COPY %0
# GCN-NEXT: %0:vgpr_32 = COPY %1
# GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF
# GCN-NEXT: %1:vgpr_32 = COPY %2
# GCN-NEXT: S_ENDPGM
---
name: swap_virt_clobber_y
registers:
- { id: 0, class: vgpr_32 }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2 = COPY %0
%0 = COPY %1
%1 = IMPLICIT_DEF
%1 = COPY %2
S_ENDPGM
...
# GCN-LABEL: name: swap_virt_clobber_x1
# GCN: bb.0:
# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF
# GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF
# GCN-NEXT: %2:vgpr_32 = COPY %0
# GCN-NEXT: %0:vgpr_32 = COPY %1
# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF
# GCN-NEXT: %1:vgpr_32 = COPY %2
# GCN-NEXT: S_ENDPGM
---
name: swap_virt_clobber_x1
registers:
- { id: 0, class: vgpr_32 }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2 = COPY %0
%0 = COPY %1
%0 = IMPLICIT_DEF
%1 = COPY %2
S_ENDPGM
...
# GCN-LABEL: name: swap_virt_clobber_x2
# GCN: bb.0:
# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF
# GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF
# GCN-NEXT: %2:vgpr_32 = COPY %0
# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF
# GCN-NEXT: %0:vgpr_32 = COPY %1
# GCN-NEXT: %1:vgpr_32 = COPY %2
# GCN-NEXT: S_ENDPGM
---
name: swap_virt_clobber_x2
registers:
- { id: 0, class: vgpr_32 }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2 = COPY %0
%0 = IMPLICIT_DEF
%0 = COPY %1
%1 = COPY %2
S_ENDPGM
...
# GCN-LABEL: name: swap_virt_clobber_t
# GCN: bb.0:
# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF
# GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF
# GCN-NEXT: %2:vgpr_32 = COPY %0
# GCN-NEXT: %0:vgpr_32 = COPY %1
# GCN-NEXT: %2:vgpr_32 = IMPLICIT_DEF
# GCN-NEXT: %1:vgpr_32 = COPY %2
# GCN-NEXT: S_ENDPGM
---
name: swap_virt_clobber_t
registers:
- { id: 0, class: vgpr_32 }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2 = COPY %0
%0 = COPY %1
%2 = IMPLICIT_DEF
%1 = COPY %2
S_ENDPGM
...
# GCN-LABEL: name: swap_virt_copy_subreg_overlap_x_full
# GCN: bb.0:
# GCN-NEXT: %0:vreg_64 = IMPLICIT_DEF
# GCN-NEXT: %1:vreg_64 = IMPLICIT_DEF
# GCN-NEXT: %2.sub0:vreg_64 = COPY %0.sub0
# GCN-NEXT: %3:vreg_64 = COPY %0
# GCN-NEXT: %0.sub0:vreg_64 = COPY %1.sub0
# GCN-NEXT: %1.sub0:vreg_64 = COPY %2.sub0
---
name: swap_virt_copy_subreg_overlap_x_full
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vreg_64 }
- { id: 2, class: vreg_64 }
- { id: 3, class: vreg_64 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2.sub0 = COPY %0.sub0
%3 = COPY %0
%0.sub0 = COPY %1.sub0
%1.sub0 = COPY %2.sub0
...
# GCN-LABEL: name: swap_virt_copy_subreg_overlap_x_part
# GCN: bb.0:
# GCN-NEXT: %0:vreg_128 = IMPLICIT_DEF
# GCN-NEXT: %1:vreg_64 = IMPLICIT_DEF
# GCN-NEXT: %2.sub0:vreg_64 = COPY %0.sub0
# GCN-NEXT: %3:vreg_64 = COPY %0.sub0_sub1
# GCN-NEXT: %0.sub0:vreg_128 = COPY %1.sub0
# GCN-NEXT: %1.sub0:vreg_64 = COPY %2.sub0
---
name: swap_virt_copy_subreg_overlap_x_part
registers:
- { id: 0, class: vreg_128 }
- { id: 1, class: vreg_64 }
- { id: 2, class: vreg_64 }
- { id: 3, class: vreg_64 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2.sub0 = COPY %0.sub0
%3 = COPY %0.sub0_sub1
%0.sub0 = COPY %1.sub0
%1.sub0 = COPY %2.sub0
...
# GCN-LABEL: name: swap_virt_copy_subreg_wide_y
# GCN: bb.0:
# GCN-NEXT: %0:vreg_64 = IMPLICIT_DEF
# GCN-NEXT: %1:vreg_64 = IMPLICIT_DEF
# GCN-NEXT: %2.sub0:vreg_64 = COPY %0.sub0
# GCN-NEXT: %0.sub0:vreg_64 = COPY %1.sub0
# GCN-NEXT: %1:vreg_64 = COPY %2
---
name: swap_virt_copy_subreg_wide_y
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vreg_64 }
- { id: 2, class: vreg_64 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2.sub0 = COPY %0.sub0
%0.sub0 = COPY %1.sub0
%1 = COPY %2
...
# GCN-LABEL: name: swap_virt_b64
# GCN: bb.0:
# GCN-NEXT: %0:vreg_64 = IMPLICIT_DEF
# GCN-NEXT: %1:vreg_64 = IMPLICIT_DEF
# GCN-NEXT: %0.sub0:vreg_64, %1.sub0:vreg_64 = V_SWAP_B32 %1.sub0, %0.sub0, implicit $exec
# GCN-NEXT: %0.sub1:vreg_64, %1.sub1:vreg_64 = V_SWAP_B32 %1.sub1, %0.sub1, implicit $exec
---
name: swap_virt_b64
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vreg_64 }
- { id: 2, class: vreg_64 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2 = COPY %0
%0 = COPY %1
%1 = COPY %2
...
# GCN-LABEL: name: swap_virt_b128
# GCN: bb.0:
# GCN-NEXT: %0:vreg_128 = IMPLICIT_DEF
# GCN-NEXT: %1:vreg_128 = IMPLICIT_DEF
# GCN-NEXT: %0.sub0:vreg_128, %1.sub0:vreg_128 = V_SWAP_B32 %1.sub0, %0.sub0, implicit $exec
# GCN-NEXT: %0.sub1:vreg_128, %1.sub1:vreg_128 = V_SWAP_B32 %1.sub1, %0.sub1, implicit $exec
# GCN-NEXT: %0.sub2:vreg_128, %1.sub2:vreg_128 = V_SWAP_B32 %1.sub2, %0.sub2, implicit $exec
# GCN-NEXT: %0.sub3:vreg_128, %1.sub3:vreg_128 = V_SWAP_B32 %1.sub3, %0.sub3, implicit $exec
---
name: swap_virt_b128
registers:
- { id: 0, class: vreg_128 }
- { id: 1, class: vreg_128 }
- { id: 2, class: vreg_128 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2 = COPY %0
%0 = COPY %1
%1 = COPY %2
...
# GCN-LABEL: name: swap_virt_b128_sub0_1
# GCN: bb.0:
# GCN-NEXT: %0:vreg_128 = IMPLICIT_DEF
# GCN-NEXT: %1:vreg_128 = IMPLICIT_DEF
# GCN-NEXT: %0.sub0:vreg_128, %1.sub0:vreg_128 = V_SWAP_B32 %1.sub0, %0.sub0, implicit $exec
# GCN-NEXT: %0.sub1:vreg_128, %1.sub1:vreg_128 = V_SWAP_B32 %1.sub1, %0.sub1, implicit $exec
# GCN-NEXT: S_ENDPGM
---
name: swap_virt_b128_sub0_1
registers:
- { id: 0, class: vreg_128 }
- { id: 1, class: vreg_128 }
- { id: 2, class: vreg_128 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2.sub0_sub1 = COPY %0.sub0_sub1
%0.sub0_sub1 = COPY %1.sub0_sub1
%1.sub0_sub1 = COPY %2.sub0_sub1
S_ENDPGM
...
# GCN-LABEL: name: swap_virt_b128_sub2_3
# GCN: bb.0:
# GCN-NEXT: %0:vreg_128 = IMPLICIT_DEF
# GCN-NEXT: %1:vreg_128 = IMPLICIT_DEF
# GCN-NEXT: %0.sub2:vreg_128, %1.sub2:vreg_128 = V_SWAP_B32 %1.sub2, %0.sub2, implicit $exec
# GCN-NEXT: %0.sub3:vreg_128, %1.sub3:vreg_128 = V_SWAP_B32 %1.sub3, %0.sub3, implicit $exec
# GCN-NEXT: S_ENDPGM
---
name: swap_virt_b128_sub2_3
registers:
- { id: 0, class: vreg_128 }
- { id: 1, class: vreg_128 }
- { id: 2, class: vreg_128 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2.sub2_sub3 = COPY %0.sub2_sub3
%0.sub2_sub3 = COPY %1.sub2_sub3
%1.sub2_sub3 = COPY %2.sub2_sub3
S_ENDPGM
...
# GCN-LABEL: name: swap_virt_s_to_s
# GCN: bb.0:
# GCN-NEXT: %0:sgpr_32 = IMPLICIT_DEF
# GCN-NEXT: %1:sgpr_32 = IMPLICIT_DEF
# GCN-NEXT: %2:sgpr_32 = COPY %0
# GCN-NEXT: %0:sgpr_32 = COPY %1
# GCN-NEXT: %1:sgpr_32 = COPY %2
---
name: swap_virt_s_to_s
registers:
- { id: 0, class: sgpr_32 }
- { id: 1, class: sgpr_32 }
- { id: 2, class: sgpr_32 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2 = COPY %0
%0 = COPY %1
%1 = COPY %2
...
# GCN-LABEL: name: swap_virt_copy_subreg_impdef_super
# GCN: %0.sub0:vreg_64, %1.sub0:vreg_64 = V_SWAP_B32 %1.sub0, %0.sub0, implicit $exec
---
name: swap_virt_copy_subreg_impdef_super
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vreg_64 }
- { id: 2, class: vreg_64 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2.sub0 = COPY %0.sub0, implicit-def %2, implicit $exec
%2.sub1 = COPY %0.sub1
%0.sub0 = COPY %1.sub0
%0.sub1 = COPY %1.sub1
%1.sub0 = COPY %2.sub0
...
# GCN-LABEL: name: swap_virt_copy_subreg_impuse_x
# GCN: bb.0:
# GCN-NEXT: %0:vreg_64 = IMPLICIT_DEF
# GCN-NEXT: %1:vreg_64 = IMPLICIT_DEF
# GCN-NEXT: %2.sub0:vreg_64 = COPY %0.sub0
# GCN-NEXT: %2.sub1:vreg_64 = COPY %0.sub1
# GCN-NEXT: %0.sub0:vreg_64 = COPY %1.sub0, implicit %0
# GCN-NEXT: %0.sub1:vreg_64 = COPY %1.sub1
# GCN-NEXT: %1.sub0:vreg_64 = COPY %2.sub0
# GCN-NEXT: S_ENDPGM
---
name: swap_virt_copy_subreg_impuse_x
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vreg_64 }
- { id: 2, class: vreg_64 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2.sub0 = COPY %0.sub0
%2.sub1 = COPY %0.sub1
%0.sub0 = COPY %1.sub0, implicit %0
%0.sub1 = COPY %1.sub1
%1.sub0 = COPY %2.sub0
S_ENDPGM
...