AMDGPU: Use DAG patterns for div_fmas

2019-12-20 21:39:45 +05:30 · 2019-12-20 21:39:45 +05:30 · cbf719b568
parent 79b29d6df7
commit cbf719b568
2 changed files with 18 additions and 34 deletions
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@ -278,7 +278,6 @@ private:
  void SelectAddcSubb(SDNode *N);
  void SelectUADDO_USUBO(SDNode *N);
  void SelectDIV_SCALE(SDNode *N);
  void SelectDIV_FMAS(SDNode *N);
  void SelectMAD_64_32(SDNode *N);
  void SelectFMA_W_CHAIN(SDNode *N);
  void SelectFMUL_W_CHAIN(SDNode *N);
@ -871,10 +870,6 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
    SelectDIV_SCALE(N);
    return;
  }
  case AMDGPUISD::DIV_FMAS: {
    SelectDIV_FMAS(N);
    return;
  }
  case AMDGPUISD::MAD_I64_I32:
  case AMDGPUISD::MAD_U64_U32: {
    SelectMAD_64_32(N);
@ -1128,35 +1123,6 @@ void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
 }
 void AMDGPUDAGToDAGISel::SelectDIV_FMAS(SDNode *N) {
  const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
  const SIRegisterInfo *TRI = ST->getRegisterInfo();
  SDLoc SL(N);
  EVT VT = N->getValueType(0);
  assert(VT == MVT::f32 || VT == MVT::f64);
  unsigned Opc
    = (VT == MVT::f64) ? AMDGPU::V_DIV_FMAS_F64 : AMDGPU::V_DIV_FMAS_F32;
  SDValue CarryIn = N->getOperand(3);
  // V_DIV_FMAS implicitly reads VCC.
  SDValue VCC = CurDAG->getCopyToReg(CurDAG->getEntryNode(), SL,
                                     TRI->getVCC(), CarryIn, SDValue());
  SDValue Ops[10];
  SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
  SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
  SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
  Ops[8] = VCC;
  Ops[9] = VCC.getValue(1);
  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
 }
 // We need to handle this here because tablegen doesn't support matching
 // instructions with multiple outputs.
 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@ -716,6 +716,24 @@ let SubtargetPredicate = isGFX10Plus in {
    V_PERMLANEX16_B32>;
 } // End SubtargetPredicate = isGFX10Plus
 class DivFmasPat<ValueType vt, Instruction inst, Register CondReg> : GCNPat<
  (AMDGPUdiv_fmas (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
                      (VOP3Mods vt:$src1, i32:$src1_modifiers),
                      (VOP3Mods vt:$src2, i32:$src2_modifiers),
                      (i1 CondReg)),
  (inst $src0_modifiers, $src0, $src1_modifiers, $src1, $src2_modifiers, $src2)
 >;
 let WaveSizePredicate = isWave64 in {
 def : DivFmasPat<f32, V_DIV_FMAS_F32, VCC>;
 def : DivFmasPat<f64, V_DIV_FMAS_F64, VCC>;
 }
 let WaveSizePredicate = isWave32 in {
 def : DivFmasPat<f32, V_DIV_FMAS_F32, VCC_LO>;
 def : DivFmasPat<f64, V_DIV_FMAS_F64, VCC_LO>;
 }
 //===----------------------------------------------------------------------===//
 // Integer Clamp Patterns
 //===----------------------------------------------------------------------===//