forked from OSchip/llvm-project
[AMDGPU] isRenamable fixes to support copy forwarding
Mark more opcodes as hasExtraSrcRegAllocReq so that their operands will be marked as not renamable, to avoid copy forwarding violating the constraint that only one operand may use the constant bus. These changes fix a few mis-compiles when copy forwarding is enabled in MachineCopyPropagation by D41835 (and were reviewed as part of that change). llvm-svn: 323794
This commit is contained in:
parent
c9265e81f4
commit
1d53101387
|
@ -262,14 +262,15 @@ void SIInsertSkips::kill(MachineInstr &MI) {
|
||||||
|
|
||||||
assert(MI.getOperand(0).isReg());
|
assert(MI.getOperand(0).isReg());
|
||||||
|
|
||||||
|
MachineInstr *NewMI;
|
||||||
if (TRI->isVGPR(MBB.getParent()->getRegInfo(),
|
if (TRI->isVGPR(MBB.getParent()->getRegInfo(),
|
||||||
MI.getOperand(0).getReg())) {
|
MI.getOperand(0).getReg())) {
|
||||||
Opcode = AMDGPU::getVOPe32(Opcode);
|
Opcode = AMDGPU::getVOPe32(Opcode);
|
||||||
BuildMI(MBB, &MI, DL, TII->get(Opcode))
|
NewMI = BuildMI(MBB, &MI, DL, TII->get(Opcode))
|
||||||
.add(MI.getOperand(1))
|
.add(MI.getOperand(1))
|
||||||
.add(MI.getOperand(0));
|
.add(MI.getOperand(0));
|
||||||
} else {
|
} else {
|
||||||
BuildMI(MBB, &MI, DL, TII->get(Opcode))
|
NewMI = BuildMI(MBB, &MI, DL, TII->get(Opcode))
|
||||||
.addReg(AMDGPU::VCC, RegState::Define)
|
.addReg(AMDGPU::VCC, RegState::Define)
|
||||||
.addImm(0) // src0 modifiers
|
.addImm(0) // src0 modifiers
|
||||||
.add(MI.getOperand(1))
|
.add(MI.getOperand(1))
|
||||||
|
@ -277,6 +278,11 @@ void SIInsertSkips::kill(MachineInstr &MI) {
|
||||||
.add(MI.getOperand(0))
|
.add(MI.getOperand(0))
|
||||||
.addImm(0); // omod
|
.addImm(0); // omod
|
||||||
}
|
}
|
||||||
|
// Clear isRenamable bit if new opcode requires it to be 0.
|
||||||
|
if (NewMI->hasExtraSrcRegAllocReq())
|
||||||
|
for (MachineOperand &NewMO : NewMI->uses())
|
||||||
|
if (NewMO.isReg() && NewMO.isUse())
|
||||||
|
NewMO.setIsRenamable(false);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case AMDGPU::SI_KILL_I1_TERMINATOR: {
|
case AMDGPU::SI_KILL_I1_TERMINATOR: {
|
||||||
|
|
|
@ -203,6 +203,8 @@ class VPseudoInstSI<dag outs, dag ins, list<dag> pattern = [], string asm = "">
|
||||||
: PseudoInstSI<outs, ins, pattern, asm> {
|
: PseudoInstSI<outs, ins, pattern, asm> {
|
||||||
let VALU = 1;
|
let VALU = 1;
|
||||||
let Uses = [EXEC];
|
let Uses = [EXEC];
|
||||||
|
// Avoid changing source registers in a way that violates constant bus read limitations.
|
||||||
|
let hasExtraSrcRegAllocReq = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
class CFPseudoInstSI<dag outs, dag ins, list<dag> pattern = [],
|
class CFPseudoInstSI<dag outs, dag ins, list<dag> pattern = [],
|
||||||
|
|
|
@ -355,14 +355,12 @@ def V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPU
|
||||||
|
|
||||||
def V_DIV_SCALE_F32 : VOP3_Pseudo <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> {
|
def V_DIV_SCALE_F32 : VOP3_Pseudo <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> {
|
||||||
let SchedRW = [WriteFloatFMA, WriteSALU];
|
let SchedRW = [WriteFloatFMA, WriteSALU];
|
||||||
let hasExtraSrcRegAllocReq = 1;
|
|
||||||
let AsmMatchConverter = "";
|
let AsmMatchConverter = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
// Double precision division pre-scale.
|
// Double precision division pre-scale.
|
||||||
def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1> {
|
def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1> {
|
||||||
let SchedRW = [WriteDouble, WriteSALU];
|
let SchedRW = [WriteDouble, WriteSALU];
|
||||||
let hasExtraSrcRegAllocReq = 1;
|
|
||||||
let AsmMatchConverter = "";
|
let AsmMatchConverter = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -81,6 +81,8 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [],
|
||||||
let UseNamedOperandTable = 1;
|
let UseNamedOperandTable = 1;
|
||||||
let VOP3_OPSEL = isVop3OpSel;
|
let VOP3_OPSEL = isVop3OpSel;
|
||||||
let IsPacked = P.IsPacked;
|
let IsPacked = P.IsPacked;
|
||||||
|
// Avoid changing source registers in a way that violates constant bus read limitations.
|
||||||
|
let hasExtraSrcRegAllocReq = 1;
|
||||||
|
|
||||||
string Mnemonic = opName;
|
string Mnemonic = opName;
|
||||||
string AsmOperands = !if(isVop3OpSel,
|
string AsmOperands = !if(isVop3OpSel,
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# RUN: llc -march=amdgcn -verify-machineinstrs -start-before si-shrink-instructions -stop-before si-insert-skips -o - %s | FileCheck -check-prefix=GCN %s
|
# RUN: llc -march=amdgcn -verify-machineinstrs -start-before si-shrink-instructions -stop-before si-insert-skips -o - %s | FileCheck -check-prefix=GCN %s
|
||||||
|
|
||||||
# GCN-LABEL: name: subbrev{{$}}
|
# GCN-LABEL: name: subbrev{{$}}
|
||||||
# GCN: V_SUBBREV_U32_e64 0, undef %vgpr0, killed renamable %vcc, implicit %exec
|
# GCN: V_SUBBREV_U32_e64 0, undef %vgpr0, killed %vcc, implicit %exec
|
||||||
|
|
||||||
---
|
---
|
||||||
name: subbrev
|
name: subbrev
|
||||||
|
@ -25,7 +25,7 @@ body: |
|
||||||
...
|
...
|
||||||
|
|
||||||
# GCN-LABEL: name: subb{{$}}
|
# GCN-LABEL: name: subb{{$}}
|
||||||
# GCN: V_SUBB_U32_e64 undef %vgpr0, 0, killed renamable %vcc, implicit %exec
|
# GCN: V_SUBB_U32_e64 undef %vgpr0, 0, killed %vcc, implicit %exec
|
||||||
|
|
||||||
---
|
---
|
||||||
name: subb
|
name: subb
|
||||||
|
@ -49,7 +49,7 @@ body: |
|
||||||
...
|
...
|
||||||
|
|
||||||
# GCN-LABEL: name: addc{{$}}
|
# GCN-LABEL: name: addc{{$}}
|
||||||
# GCN: V_ADDC_U32_e32 0, undef renamable %vgpr0, implicit-def %vcc, implicit killed %vcc, implicit %exec
|
# GCN: V_ADDC_U32_e32 0, undef %vgpr0, implicit-def %vcc, implicit killed %vcc, implicit %exec
|
||||||
|
|
||||||
---
|
---
|
||||||
name: addc
|
name: addc
|
||||||
|
@ -73,7 +73,7 @@ body: |
|
||||||
...
|
...
|
||||||
|
|
||||||
# GCN-LABEL: name: addc2{{$}}
|
# GCN-LABEL: name: addc2{{$}}
|
||||||
# GCN: V_ADDC_U32_e32 0, undef renamable %vgpr0, implicit-def %vcc, implicit killed %vcc, implicit %exec
|
# GCN: V_ADDC_U32_e32 0, undef %vgpr0, implicit-def %vcc, implicit killed %vcc, implicit %exec
|
||||||
|
|
||||||
---
|
---
|
||||||
name: addc2
|
name: addc2
|
||||||
|
|
|
@ -9,7 +9,7 @@
|
||||||
# CHECK: - { id: 1, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4,
|
# CHECK: - { id: 1, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4,
|
||||||
# CHECK-NEXT: stack-id: 1,
|
# CHECK-NEXT: stack-id: 1,
|
||||||
|
|
||||||
# CHECK: SI_SPILL_V32_SAVE killed renamable %vgpr0, %stack.0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr5, 0, implicit %exec :: (store 4 into %stack.0)
|
# CHECK: SI_SPILL_V32_SAVE killed %vgpr0, %stack.0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr5, 0, implicit %exec :: (store 4 into %stack.0)
|
||||||
# CHECK: %vgpr0 = SI_SPILL_V32_RESTORE %stack.0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr5, 0, implicit %exec :: (load 4 from %stack.0)
|
# CHECK: %vgpr0 = SI_SPILL_V32_RESTORE %stack.0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr5, 0, implicit %exec :: (load 4 from %stack.0)
|
||||||
|
|
||||||
# CHECK: SI_SPILL_S32_SAVE killed renamable %sgpr6, %stack.1, implicit %exec, implicit %sgpr0_sgpr1_sgpr2_sgpr3, implicit %sgpr5, implicit-def dead %m0 :: (store 4 into %stack.1)
|
# CHECK: SI_SPILL_S32_SAVE killed renamable %sgpr6, %stack.1, implicit %exec, implicit %sgpr0_sgpr1_sgpr2_sgpr3, implicit %sgpr5, implicit-def dead %m0 :: (store 4 into %stack.1)
|
||||||
|
|
Loading…
Reference in New Issue