AMDGPU: Fix violating constant bus restriction

You can't use madmk/madmk if it already uses an SGPR input.

llvm-svn: 313298
This commit is contained in:
Matt Arsenault 2017-09-14 20:54:29 +00:00
parent 312ccf761c
commit c317287fde
2 changed files with 27 additions and 4 deletions

View File

@ -2150,9 +2150,8 @@ static int64_t getFoldableImm(const MachineOperand* MO) {
const MachineFunction *MF = MO->getParent()->getParent()->getParent();
const MachineRegisterInfo &MRI = MF->getRegInfo();
auto Def = MRI.getUniqueVRegDef(MO->getReg());
if (Def && (Def->getOpcode() == AMDGPU::S_MOV_B32 ||
Def->getOpcode() == AMDGPU::V_MOV_B32_e32) &&
Def->getOperand(1).isImm())
if (Def && Def->getOpcode() == AMDGPU::V_MOV_B32_e32 &&
Def->getOperand(1).isImm())
return Def->getOperand(1).getImm();
return AMDGPU::NoRegister;
}
@ -2194,7 +2193,9 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
if (!Src0Mods && !Src1Mods && !Clamp && !Omod) {
if (!Src0Mods && !Src1Mods && !Clamp && !Omod &&
// If we have an SGPR input, we will violate the constant bus restriction.
!RI.isSGPRReg(MBB->getParent()->getRegInfo(), Src0->getReg())) {
if (auto Imm = getFoldableImm(Src2)) {
return BuildMI(*MBB, MI, MI.getDebugLoc(),
get(IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32))

View File

@ -108,3 +108,25 @@ body: |
%1 = V_MOV_B32_e32 1078523331, implicit %exec
%2 = V_MAC_F16_e32 killed %0.sub0, %0.sub1, %1, implicit %exec
...
# Make sure constant bus restriction isn't violated if src0 is an SGPR.
# GCN-LABEL: name: test_madak_sgpr_src0_f32
# GCN: %1 = V_MOV_B32_e32 1078523331, implicit %exec
# GCN: %2 = V_MAD_F32 0, killed %0, 0, %1, 0, %3, 0, 0, implicit %exec
---
name: test_madak_sgpr_src0_f32
registers:
- { id: 0, class: sreg_32_xm0 }
- { id: 1, class: vgpr_32}
- { id: 2, class: vgpr_32 }
- { id: 3, class: vgpr_32 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = V_MOV_B32_e32 1078523331, implicit %exec
%2 = V_MAC_F32_e32 killed %0, %1, %3, implicit %exec
...