forked from OSchip/llvm-project
[AMDGPU/GlobalISel] Add llvm.amdgcn.fdiv.fast legalization.
Reviewers: arsenm Reviewed By: arsenm Subscribers: volkan, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, dstuttard, tpr, t-tye, hiraditya, Petar.Avramovic, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64966 llvm-svn: 367344
This commit is contained in:
parent
52b87ac32f
commit
c99f62e313
|
@ -867,7 +867,8 @@ public:
|
|||
///
|
||||
/// \return a MachineInstrBuilder for the newly created instruction.
|
||||
MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res,
|
||||
const SrcOp &Op0, const SrcOp &Op1);
|
||||
const SrcOp &Op0, const SrcOp &Op1,
|
||||
Optional<unsigned> Flags = None);
|
||||
|
||||
/// Build and insert a \p Res = G_SELECT \p Tst, \p Op0, \p Op1
|
||||
///
|
||||
|
@ -880,7 +881,8 @@ public:
|
|||
///
|
||||
/// \return a MachineInstrBuilder for the newly created instruction.
|
||||
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst,
|
||||
const SrcOp &Op0, const SrcOp &Op1);
|
||||
const SrcOp &Op0, const SrcOp &Op1,
|
||||
Optional<unsigned> Flags = None);
|
||||
|
||||
/// Build and insert \p Res = G_INSERT_VECTOR_ELT \p Val,
|
||||
/// \p Elt, \p Idx
|
||||
|
@ -1210,6 +1212,12 @@ public:
|
|||
return buildInstr(TargetOpcode::G_SMULH, {Dst}, {Src0, Src1}, Flags);
|
||||
}
|
||||
|
||||
MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0,
|
||||
const SrcOp &Src1,
|
||||
Optional<unsigned> Flags = None) {
|
||||
return buildInstr(TargetOpcode::G_FMUL, {Dst}, {Src0, Src1}, Flags);
|
||||
}
|
||||
|
||||
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0,
|
||||
const SrcOp &Src1,
|
||||
Optional<unsigned> Flags = None) {
|
||||
|
@ -1322,8 +1330,9 @@ public:
|
|||
}
|
||||
|
||||
/// Build and insert \p Res = G_FABS \p Op0
|
||||
MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0) {
|
||||
return buildInstr(TargetOpcode::G_FABS, {Dst}, {Src0});
|
||||
MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0,
|
||||
Optional<unsigned> Flags = None) {
|
||||
return buildInstr(TargetOpcode::G_FABS, {Dst}, {Src0}, Flags);
|
||||
}
|
||||
|
||||
/// Build and insert \p Dst = G_FCANONICALIZE \p Src0
|
||||
|
|
|
@ -697,17 +697,19 @@ MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred,
|
|||
MachineInstrBuilder MachineIRBuilder::buildFCmp(CmpInst::Predicate Pred,
|
||||
const DstOp &Res,
|
||||
const SrcOp &Op0,
|
||||
const SrcOp &Op1) {
|
||||
const SrcOp &Op1,
|
||||
Optional<unsigned> Flags) {
|
||||
|
||||
return buildInstr(TargetOpcode::G_FCMP, Res, {Pred, Op0, Op1});
|
||||
return buildInstr(TargetOpcode::G_FCMP, Res, {Pred, Op0, Op1}, Flags);
|
||||
}
|
||||
|
||||
MachineInstrBuilder MachineIRBuilder::buildSelect(const DstOp &Res,
|
||||
const SrcOp &Tst,
|
||||
const SrcOp &Op0,
|
||||
const SrcOp &Op1) {
|
||||
const SrcOp &Op1,
|
||||
Optional<unsigned> Flags) {
|
||||
|
||||
return buildInstr(TargetOpcode::G_SELECT, {Res}, {Tst, Op0, Op1});
|
||||
return buildInstr(TargetOpcode::G_SELECT, {Res}, {Tst, Op0, Op1}, Flags);
|
||||
}
|
||||
|
||||
MachineInstrBuilder
|
||||
|
|
|
@ -1274,6 +1274,42 @@ bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin(
|
|||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPULegalizerInfo::legalizeFDIVFast(MachineInstr &MI,
|
||||
MachineRegisterInfo &MRI,
|
||||
MachineIRBuilder &B) const {
|
||||
B.setInstr(MI);
|
||||
Register Res = MI.getOperand(0).getReg();
|
||||
Register LHS = MI.getOperand(2).getReg();
|
||||
Register RHS = MI.getOperand(3).getReg();
|
||||
uint16_t Flags = MI.getFlags();
|
||||
|
||||
LLT S32 = LLT::scalar(32);
|
||||
LLT S1 = LLT::scalar(1);
|
||||
|
||||
auto Abs = B.buildFAbs(S32, RHS, Flags);
|
||||
const APFloat C0Val(1.0f);
|
||||
|
||||
auto C0 = B.buildConstant(S32, 0x6f800000);
|
||||
auto C1 = B.buildConstant(S32, 0x2f800000);
|
||||
auto C2 = B.buildConstant(S32, FloatToBits(1.0f));
|
||||
|
||||
auto CmpRes = B.buildFCmp(CmpInst::FCMP_OGT, S1, Abs, C0, Flags);
|
||||
auto Sel = B.buildSelect(S32, CmpRes, C1, C2, Flags);
|
||||
|
||||
auto Mul0 = B.buildFMul(S32, RHS, Sel, Flags);
|
||||
|
||||
auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}, false)
|
||||
.addUse(Mul0.getReg(0))
|
||||
.setMIFlags(Flags);
|
||||
|
||||
auto Mul1 = B.buildFMul(S32, LHS, RCP, Flags);
|
||||
|
||||
B.buildFMul(Res, Sel, Mul1, Flags);
|
||||
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI,
|
||||
MachineRegisterInfo &MRI,
|
||||
MachineIRBuilder &B) const {
|
||||
|
@ -1388,6 +1424,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
|
|||
case Intrinsic::amdgcn_dispatch_id:
|
||||
return legalizePreloadedArgIntrin(MI, MRI, B,
|
||||
AMDGPUFunctionArgInfo::DISPATCH_ID);
|
||||
case Intrinsic::amdgcn_fdiv_fast:
|
||||
return legalizeFDIVFast(MI, MRI, B);
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -65,6 +65,9 @@ public:
|
|||
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
|
||||
AMDGPUFunctionArgInfo::PreloadedValue ArgType) const;
|
||||
|
||||
bool legalizeFDIVFast(MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
MachineIRBuilder &B) const;
|
||||
|
||||
bool legalizeImplicitArgPtr(MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
MachineIRBuilder &B) const;
|
||||
bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
|
|
|
@ -1958,7 +1958,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
|||
case Intrinsic::amdgcn_udot4:
|
||||
case Intrinsic::amdgcn_sdot8:
|
||||
case Intrinsic::amdgcn_udot8:
|
||||
case Intrinsic::amdgcn_fdiv_fast:
|
||||
case Intrinsic::amdgcn_wwm:
|
||||
case Intrinsic::amdgcn_wqm:
|
||||
return getDefaultMappingVOP(MI);
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -run-pass=legalizer %s -o - | FileCheck %s
|
||||
|
||||
---
|
||||
name: test_amdgcn_fdiv_fast
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
|
||||
; CHECK-LABEL: name: test_amdgcn_fdiv_fast
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; CHECK: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[COPY1]]
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1870659584
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 796917760
|
||||
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216
|
||||
; CHECK: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[FABS]](s32), [[C]]
|
||||
; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]]
|
||||
; CHECK: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY1]], [[SELECT]]
|
||||
; CHECK: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FMUL]](s32)
|
||||
; CHECK: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[INT]]
|
||||
; CHECK: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[SELECT]], [[FMUL1]]
|
||||
; CHECK: $vgpr0 = COPY [[FMUL2]](s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fdiv.fast), %0, %1
|
||||
$vgpr0 = COPY %2
|
||||
...
|
||||
|
||||
---
|
||||
name: test_amdgcn_fdiv_fast_propagate_flags
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
|
||||
; CHECK-LABEL: name: test_amdgcn_fdiv_fast_propagate_flags
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; CHECK: %3:_(s32) = nsz G_FABS [[COPY1]]
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1870659584
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 796917760
|
||||
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216
|
||||
; CHECK: %7:_(s1) = nsz G_FCMP floatpred(ogt), %3(s32), [[C]]
|
||||
; CHECK: %8:_(s32) = nsz G_SELECT %7(s1), [[C1]], [[C2]]
|
||||
; CHECK: %9:_(s32) = nsz G_FMUL [[COPY1]], %8
|
||||
; CHECK: %10:_(s32) = nsz G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %9(s32)
|
||||
; CHECK: %11:_(s32) = nsz G_FMUL [[COPY]], %10
|
||||
; CHECK: %2:_(s32) = nsz G_FMUL %8, %11
|
||||
; CHECK: $vgpr0 = COPY %2(s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s32) = nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fdiv.fast), %0, %1
|
||||
$vgpr0 = COPY %2
|
||||
...
|
Loading…
Reference in New Issue