AMDGPU/GlobalISel: Select llvm.amdgcn.div.scale

This commit is contained in:
Matt Arsenault 2020-04-05 21:16:00 -04:00 committed by Matt Arsenault
parent e87ec66762
commit 8a5f0dafd4
3 changed files with 1538 additions and 0 deletions

View File

@ -800,6 +800,40 @@ bool AMDGPUInstructionSelector::selectInterpP1F16(MachineInstr &MI) const {
return true;
}
// We need to handle this here because tablegen doesn't support matching
// instructions with multiple outputs.
bool AMDGPUInstructionSelector::selectDivScale(MachineInstr &MI) const {
Register Dst0 = MI.getOperand(0).getReg();
Register Dst1 = MI.getOperand(1).getReg();
LLT Ty = MRI->getType(Dst0);
unsigned Opc;
if (Ty == LLT::scalar(32))
Opc = AMDGPU::V_DIV_SCALE_F32;
else if (Ty == LLT::scalar(64))
Opc = AMDGPU::V_DIV_SCALE_F64;
else
return false;
const DebugLoc &DL = MI.getDebugLoc();
MachineBasicBlock *MBB = MI.getParent();
Register Numer = MI.getOperand(3).getReg();
Register Denom = MI.getOperand(4).getReg();
unsigned ChooseDenom = MI.getOperand(5).getImm();
Register Src0 = ChooseDenom != 0 ? Numer : Denom;
auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc), Dst0)
.addDef(Dst1)
.addUse(Src0)
.addUse(Denom)
.addUse(Numer);
MI.eraseFromParent();
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
}
bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
unsigned IntrinsicID = I.getIntrinsicID();
switch (IntrinsicID) {
@ -832,6 +866,8 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
return constrainCopyLikeIntrin(I, AMDGPU::SOFT_WQM);
case Intrinsic::amdgcn_wwm:
return constrainCopyLikeIntrin(I, AMDGPU::WWM);
case Intrinsic::amdgcn_div_scale:
return selectDivScale(I);
default:
return selectImpl(I, *CoverageInfo);
}

View File

@ -104,6 +104,7 @@ private:
bool selectG_INSERT(MachineInstr &I) const;
bool selectInterpP1F16(MachineInstr &MI) const;
bool selectDivScale(MachineInstr &MI) const;
bool selectG_INTRINSIC(MachineInstr &I) const;
bool selectEndCfIntrinsic(MachineInstr &MI) const;

File diff suppressed because it is too large Load Diff