forked from OSchip/llvm-project
AMDGPU: Fix folding immediate into readfirstlane through reg_sequence
The def instruction for the vreg may not match, because it may be folding through a reg_sequence. The assert was overly conservative and not necessary. It's not actually important if DefMI really defined the register, because the fold that will be done cares about the def of the value that will be folded. For some reason copies aren't making it through the reg_sequence, although they should. llvm-svn: 363876
This commit is contained in:
parent
f8104f01e6
commit
4d000d2488
llvm
lib/Target/AMDGPU
test/CodeGen/AMDGPU
|
@ -436,9 +436,11 @@ void SIFoldOperands::foldOperand(
|
||||||
unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
|
unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
|
||||||
unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
|
unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
|
||||||
|
|
||||||
|
MachineRegisterInfo::use_iterator Next;
|
||||||
for (MachineRegisterInfo::use_iterator
|
for (MachineRegisterInfo::use_iterator
|
||||||
RSUse = MRI->use_begin(RegSeqDstReg), RSE = MRI->use_end();
|
RSUse = MRI->use_begin(RegSeqDstReg), RSE = MRI->use_end();
|
||||||
RSUse != RSE; ++RSUse) {
|
RSUse != RSE; RSUse = Next) {
|
||||||
|
Next = std::next(RSUse);
|
||||||
|
|
||||||
MachineInstr *RSUseMI = RSUse->getParent();
|
MachineInstr *RSUseMI = RSUse->getParent();
|
||||||
if (RSUse->getSubReg() != RegSeqDstSubReg)
|
if (RSUse->getSubReg() != RegSeqDstSubReg)
|
||||||
|
@ -523,6 +525,9 @@ void SIFoldOperands::foldOperand(
|
||||||
return;
|
return;
|
||||||
|
|
||||||
UseMI->setDesc(TII->get(AMDGPU::S_MOV_B32));
|
UseMI->setDesc(TII->get(AMDGPU::S_MOV_B32));
|
||||||
|
|
||||||
|
// FIXME: ChangeToImmediate should clear subreg
|
||||||
|
UseMI->getOperand(1).setSubReg(0);
|
||||||
UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm());
|
UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm());
|
||||||
UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane)
|
UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane)
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -6079,7 +6079,6 @@ bool llvm::execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI,
|
||||||
const MachineInstr &DefMI,
|
const MachineInstr &DefMI,
|
||||||
const MachineInstr *UseMI) {
|
const MachineInstr *UseMI) {
|
||||||
assert(MRI.isSSA() && "Must be run on SSA");
|
assert(MRI.isSSA() && "Must be run on SSA");
|
||||||
assert(DefMI.definesRegister(VReg) && "wrong def instruction");
|
|
||||||
|
|
||||||
auto *TRI = MRI.getTargetRegisterInfo();
|
auto *TRI = MRI.getTargetRegisterInfo();
|
||||||
auto *DefBB = DefMI.getParent();
|
auto *DefBB = DefMI.getParent();
|
||||||
|
|
|
@ -279,12 +279,24 @@ define amdgpu_vs float @load_addr_no_fold(i32 addrspace(6)* inreg noalias %p0) #
|
||||||
ret float %r2
|
ret float %r2
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: {{^}}vgpr_arg_src:
|
||||||
|
; CHECK: v_readfirstlane_b32 s[[READLANE:[0-9]+]], v0
|
||||||
|
; CHECK: s_mov_b32 s[[ZERO:[0-9]+]]
|
||||||
|
; CHECK: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[READLANE]]:[[ZERO]]{{\]}}
|
||||||
|
define amdgpu_vs float @vgpr_arg_src(<4 x i32> addrspace(6)* %arg) {
|
||||||
|
main_body:
|
||||||
|
%tmp9 = load <4 x i32>, <4 x i32> addrspace(6)* %arg
|
||||||
|
%tmp10 = call nsz float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %tmp9, i32 undef, i32 0, i32 0, i32 0) #1
|
||||||
|
ret float %tmp10
|
||||||
|
}
|
||||||
|
|
||||||
; Function Attrs: nounwind readnone speculatable
|
; Function Attrs: nounwind readnone speculatable
|
||||||
declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #6
|
declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #6
|
||||||
|
|
||||||
; Function Attrs: nounwind readonly
|
; Function Attrs: nounwind readonly
|
||||||
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #7
|
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #7
|
||||||
|
|
||||||
|
declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32) #7
|
||||||
|
|
||||||
!0 = !{}
|
!0 = !{}
|
||||||
|
|
||||||
|
|
|
@ -248,3 +248,126 @@ body: |
|
||||||
%1:sreg_32_xm0 = S_MOV_B32 12
|
%1:sreg_32_xm0 = S_MOV_B32 12
|
||||||
%2:sreg_32_xm0 = V_READLANE_B32 %0, %1, implicit $exec
|
%2:sreg_32_xm0 = V_READLANE_B32 %0, %1, implicit $exec
|
||||||
...
|
...
|
||||||
|
|
||||||
|
# Constant for subreg0
|
||||||
|
# GCN-LABEL: name: fold-imm-readfirstlane-regsequence0{{$}}
|
||||||
|
|
||||||
|
# GCN: %0:vgpr_32 = COPY $vgpr0
|
||||||
|
# GCN-NEXT: %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||||
|
# GCN-NEXT: %2:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, killed %1, %subreg.sub1
|
||||||
|
# GCN-NEXT: %3:sgpr_32 = V_READFIRSTLANE_B32 %2.sub0, implicit $exec
|
||||||
|
# GCN-NEXT: %4:sgpr_32 = S_MOV_B32 0
|
||||||
|
---
|
||||||
|
name: fold-imm-readfirstlane-regsequence0
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $vgpr0
|
||||||
|
%0:vgpr_32 = COPY $vgpr0
|
||||||
|
%1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||||
|
%2:vreg_64 = REG_SEQUENCE %0:vgpr_32, %subreg.sub0, killed %1:vgpr_32, %subreg.sub1
|
||||||
|
%3:sgpr_32 = V_READFIRSTLANE_B32 %2.sub0:vreg_64, implicit $exec
|
||||||
|
%4:sgpr_32 = V_READFIRSTLANE_B32 %2.sub1:vreg_64, implicit $exec
|
||||||
|
...
|
||||||
|
|
||||||
|
# Constant for subreg1
|
||||||
|
# GCN-LABEL: name: fold-imm-readfirstlane-regsequence1{{$}}
|
||||||
|
# GCN: %0:vgpr_32 = COPY $vgpr0
|
||||||
|
# GCN-NEXT: %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||||
|
# GCN-NEXT: %2:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, killed %0, %subreg.sub1
|
||||||
|
# GCN-NEXT: %3:sgpr_32 = S_MOV_B32 0
|
||||||
|
# GCN-NEXT: %4:sgpr_32 = V_READFIRSTLANE_B32 %2.sub1, implicit $exec
|
||||||
|
|
||||||
|
---
|
||||||
|
name: fold-imm-readfirstlane-regsequence1
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $vgpr0
|
||||||
|
%0:vgpr_32 = COPY $vgpr0
|
||||||
|
%1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||||
|
%2:vreg_64 = REG_SEQUENCE %1:vgpr_32, %subreg.sub0, killed %0:vgpr_32, %subreg.sub1
|
||||||
|
%3:sgpr_32 = V_READFIRSTLANE_B32 %2.sub0:vreg_64, implicit $exec
|
||||||
|
%4:sgpr_32 = V_READFIRSTLANE_B32 %2.sub1:vreg_64, implicit $exec
|
||||||
|
...
|
||||||
|
|
||||||
|
# Different constant regs for each subreg
|
||||||
|
# GCN-LABEL: name: fold-imm-readfirstlane-regsequence2{{$}}
|
||||||
|
# GCN: %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||||
|
# GCN-NEXT: %1:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||||
|
# GCN-NEXT: %2:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, killed %1, %subreg.sub1
|
||||||
|
# GCN-NEXT: %3:sgpr_32 = S_MOV_B32 0
|
||||||
|
# GCN-NEXT: %4:sgpr_32 = S_MOV_B32 1
|
||||||
|
---
|
||||||
|
name: fold-imm-readfirstlane-regsequence2
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||||
|
%1:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||||
|
%2:vreg_64 = REG_SEQUENCE %0:vgpr_32, %subreg.sub0, killed %1:vgpr_32, %subreg.sub1
|
||||||
|
%3:sgpr_32 = V_READFIRSTLANE_B32 %2.sub0:vreg_64, implicit $exec
|
||||||
|
%4:sgpr_32 = V_READFIRSTLANE_B32 %2.sub1:vreg_64, implicit $exec
|
||||||
|
...
|
||||||
|
|
||||||
|
# Same constant reg for each subreg, so there are multiple constant uses
|
||||||
|
# GCN-LABEL: name: fold-imm-readfirstlane-regsequence3{{$}}
|
||||||
|
# GCN: %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||||
|
# GCN-NEXT: %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||||
|
# GCN-NEXT: %2:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, killed %1, %subreg.sub1
|
||||||
|
# GCN-NEXT: %3:sgpr_32 = S_MOV_B32 0
|
||||||
|
# GCN-NEXT: %4:sgpr_32 = S_MOV_B32 0
|
||||||
|
---
|
||||||
|
name: fold-imm-readfirstlane-regsequence3
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||||
|
%1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||||
|
%2:vreg_64 = REG_SEQUENCE %0:vgpr_32, %subreg.sub0, killed %1:vgpr_32, %subreg.sub1
|
||||||
|
%3:sgpr_32 = V_READFIRSTLANE_B32 %2.sub0:vreg_64, implicit $exec
|
||||||
|
%4:sgpr_32 = V_READFIRSTLANE_B32 %2.sub1:vreg_64, implicit $exec
|
||||||
|
...
|
||||||
|
|
||||||
|
# FIXME: This should fold
|
||||||
|
# GCN-LABEL: name: fold-copy-readfirstlane-regsequence0{{$}}
|
||||||
|
# GCN: %0:vgpr_32 = COPY $sgpr10
|
||||||
|
# GCN-NEXT: %1:vgpr_32 = COPY $sgpr11
|
||||||
|
# GCN-NEXT: %2:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, killed %1, %subreg.sub1
|
||||||
|
# GCN-NEXT: %3:sgpr_32 = V_READFIRSTLANE_B32 %2.sub0, implicit $exec
|
||||||
|
# GCN-NEXT: %4:sgpr_32 = V_READFIRSTLANE_B32 %2.sub1, implicit $exec
|
||||||
|
---
|
||||||
|
name: fold-copy-readfirstlane-regsequence0
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $sgpr10, $sgpr11
|
||||||
|
%0:vgpr_32 = COPY $sgpr10
|
||||||
|
%1:vgpr_32 = COPY $sgpr11
|
||||||
|
%2:vreg_64 = REG_SEQUENCE %0:vgpr_32, %subreg.sub0, killed %1:vgpr_32, %subreg.sub1
|
||||||
|
%3:sgpr_32 = V_READFIRSTLANE_B32 %2.sub0:vreg_64, implicit $exec
|
||||||
|
%4:sgpr_32 = V_READFIRSTLANE_B32 %2.sub1:vreg_64, implicit $exec
|
||||||
|
...
|
||||||
|
|
||||||
|
# GCN-LABEL: name: fold-copy-readfirstlane-regsequence1{{$}}
|
||||||
|
# GCN: %0:sreg_32_xm0 = COPY $sgpr10
|
||||||
|
# GCN-NEXT: %1:sreg_32_xm0 = COPY $sgpr11
|
||||||
|
# GCN-NEXT: %2:vgpr_32 = COPY %0
|
||||||
|
# GCN-NEXT: %3:vgpr_32 = COPY %1
|
||||||
|
# GCN-NEXT: %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, killed %3, %subreg.sub1
|
||||||
|
# GCN-NEXT: %5:sgpr_32 = V_READFIRSTLANE_B32 %4.sub0, implicit $exec
|
||||||
|
# GCN-NEXT: %6:sgpr_32 = V_READFIRSTLANE_B32 %4.sub1, implicit $exec
|
||||||
|
---
|
||||||
|
name: fold-copy-readfirstlane-regsequence1
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $sgpr10, $sgpr11
|
||||||
|
%0:sreg_32_xm0 = COPY $sgpr10
|
||||||
|
%1:sreg_32_xm0 = COPY $sgpr11
|
||||||
|
%2:vgpr_32 = COPY %0
|
||||||
|
%3:vgpr_32 = COPY %1
|
||||||
|
%4:vreg_64 = REG_SEQUENCE %2:vgpr_32, %subreg.sub0, killed %3:vgpr_32, %subreg.sub1
|
||||||
|
%5:sgpr_32 = V_READFIRSTLANE_B32 %4.sub0:vreg_64, implicit $exec
|
||||||
|
%6:sgpr_32 = V_READFIRSTLANE_B32 %4.sub1:vreg_64, implicit $exec
|
||||||
|
...
|
||||||
|
|
Loading…
Reference in New Issue