forked from OSchip/llvm-project
[AMDGPU] Fix multiple vreg definitions in si-lower-control-flow
Differential Revision: https://reviews.llvm.org/D26939 llvm-svn: 287608
This commit is contained in:
parent
bb536fee32
commit
ae0f6620e4
|
@ -141,8 +141,9 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
|
|||
|
||||
// Add an implicit def of exec to discourage scheduling VALU after this which
|
||||
// will interfere with trying to form s_and_saveexec_b64 later.
|
||||
unsigned CopyReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
||||
MachineInstr *CopyExec =
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), SaveExecReg)
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), CopyReg)
|
||||
.addReg(AMDGPU::EXEC)
|
||||
.addReg(AMDGPU::EXEC, RegState::ImplicitDefine);
|
||||
|
||||
|
@ -150,7 +151,7 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
|
|||
|
||||
MachineInstr *And =
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_AND_B64), Tmp)
|
||||
.addReg(SaveExecReg)
|
||||
.addReg(CopyReg)
|
||||
//.addReg(AMDGPU::EXEC)
|
||||
.addReg(Cond.getReg());
|
||||
setImpSCCDefDead(*And, true);
|
||||
|
@ -158,7 +159,7 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
|
|||
MachineInstr *Xor =
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_XOR_B64), SaveExecReg)
|
||||
.addReg(Tmp)
|
||||
.addReg(SaveExecReg);
|
||||
.addReg(CopyReg);
|
||||
setImpSCCDefDead(*Xor, ImpDefSCC.isDead());
|
||||
|
||||
// Use a copy that is a terminator to get correct spill code placement it with
|
||||
|
@ -197,6 +198,7 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
|
|||
LIS->removeInterval(SaveExecReg);
|
||||
LIS->createAndComputeVirtRegInterval(SaveExecReg);
|
||||
LIS->createAndComputeVirtRegInterval(Tmp);
|
||||
LIS->createAndComputeVirtRegInterval(CopyReg);
|
||||
}
|
||||
|
||||
void SILowerControlFlow::emitElse(MachineInstr &MI) {
|
||||
|
@ -212,14 +214,17 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
|
|||
// We are running before TwoAddressInstructions, and si_else's operands are
|
||||
// tied. In order to correctly tie the registers, split this into a copy of
|
||||
// the src like it does.
|
||||
BuildMI(MBB, Start, DL, TII->get(AMDGPU::COPY), DstReg)
|
||||
unsigned CopyReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
||||
BuildMI(MBB, Start, DL, TII->get(AMDGPU::COPY), CopyReg)
|
||||
.addOperand(MI.getOperand(1)); // Saved EXEC
|
||||
|
||||
// This must be inserted before phis and any spill code inserted before the
|
||||
// else.
|
||||
unsigned SaveReg = ExecModified ?
|
||||
MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass) : DstReg;
|
||||
MachineInstr *OrSaveExec =
|
||||
BuildMI(MBB, Start, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), DstReg)
|
||||
.addReg(DstReg);
|
||||
BuildMI(MBB, Start, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), SaveReg)
|
||||
.addReg(CopyReg);
|
||||
|
||||
MachineBasicBlock *DestBB = MI.getOperand(2).getMBB();
|
||||
|
||||
|
@ -229,7 +234,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
|
|||
MachineInstr *And =
|
||||
BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_AND_B64), DstReg)
|
||||
.addReg(AMDGPU::EXEC)
|
||||
.addReg(DstReg);
|
||||
.addReg(SaveReg);
|
||||
|
||||
if (LIS)
|
||||
LIS->InsertMachineInstrInMaps(*And);
|
||||
|
@ -260,6 +265,9 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
|
|||
// src reg is tied to dst reg.
|
||||
LIS->removeInterval(DstReg);
|
||||
LIS->createAndComputeVirtRegInterval(DstReg);
|
||||
LIS->createAndComputeVirtRegInterval(CopyReg);
|
||||
if (ExecModified)
|
||||
LIS->createAndComputeVirtRegInterval(SaveReg);
|
||||
|
||||
// Let this be recomputed.
|
||||
LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::EXEC, TRI));
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
; GCN-DAG: v_cmp_lt_f32_e32 vcc,
|
||||
; GCN: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[OTHERCC]]
|
||||
; GCN: s_and_saveexec_b64 [[SAVED:s\[[0-9]+:[0-9]+\]]], [[AND]]
|
||||
; GCN: s_xor_b64 [[SAVED]], exec, [[SAVED]]
|
||||
; GCN: s_xor_b64 {{s\[[0-9]+:[0-9]+\]}}, exec, [[SAVED]]
|
||||
;
|
||||
; TODO: The following sequence is a bug (missing s_endpgm)!
|
||||
;
|
||||
|
|
|
@ -9,8 +9,8 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
|||
|
||||
; waitcnt should be inserted after exec modification
|
||||
; SI: v_cmp_lt_i32_e32 vcc, 0,
|
||||
; SI-NEXT: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], vcc
|
||||
; SI-NEXT: s_xor_b64 [[SAVE]], exec, [[SAVE]]
|
||||
; SI-NEXT: s_and_saveexec_b64 [[SAVE1:s\[[0-9]+:[0-9]+\]]], vcc
|
||||
; SI-NEXT: s_xor_b64 [[SAVE2:s\[[0-9]+:[0-9]+\]]], exec, [[SAVE1]]
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: ; mask branch [[FLOW_BB:BB[0-9]+_[0-9]+]]
|
||||
; SI-NEXT: s_cbranch_execz [[FLOW_BB]]
|
||||
|
@ -24,9 +24,9 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
|||
|
||||
; v_mov should be after exec modification
|
||||
; SI: [[FLOW_BB]]:
|
||||
; SI-NEXT: s_or_saveexec_b64 [[SAVE]], [[SAVE]]
|
||||
; SI-NEXT: s_or_saveexec_b64 [[SAVE3:s\[[0-9]+:[0-9]+\]]], [[SAVE2]]
|
||||
; SI-NEXT: v_mov_b32_e32 v{{[0-9]+}}
|
||||
; SI-NEXT: s_xor_b64 exec, exec, [[SAVE]]
|
||||
; SI-NEXT: s_xor_b64 exec, exec, [[SAVE3]]
|
||||
; SI-NEXT: ; mask branch
|
||||
;
|
||||
define void @test_if(i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) #1 {
|
||||
|
@ -152,7 +152,7 @@ exit:
|
|||
; SI-DAG: v_cmp_ne_u32_e32 [[NEG1_CHECK_1:vcc]], -1, [[B]]
|
||||
; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]]
|
||||
; SI: s_and_saveexec_b64 [[ORNEG2:s\[[0-9]+:[0-9]+\]]], [[ORNEG1]]
|
||||
; SI: s_xor_b64 [[ORNEG2]], exec, [[ORNEG2]]
|
||||
; SI: s_xor_b64 [[ORNEG3:s\[[0-9]+:[0-9]+\]]], exec, [[ORNEG2]]
|
||||
; SI: s_cbranch_execz [[LABEL_FLOW:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; SI: BB{{[0-9]+_[0-9]+}}: ; %bb20
|
||||
|
@ -162,8 +162,8 @@ exit:
|
|||
|
||||
; SI: [[LABEL_FLOW]]:
|
||||
; SI-NEXT: ; in Loop: Header=[[LABEL_LOOP]]
|
||||
; SI-NEXT: s_or_b64 exec, exec, [[ORNEG2]]
|
||||
; SI-NEXT: s_or_b64 [[COND_STATE]], [[ORNEG2]], [[TMP]]
|
||||
; SI-NEXT: s_or_b64 exec, exec, [[ORNEG3]]
|
||||
; SI-NEXT: s_or_b64 [[COND_STATE]], [[ORNEG3]], [[TMP]]
|
||||
; SI-NEXT: s_andn2_b64 exec, exec, [[COND_STATE]]
|
||||
; SI-NEXT: s_cbranch_execnz [[LABEL_LOOP]]
|
||||
|
||||
|
|
Loading…
Reference in New Issue