From 97263fa2ddd21661b90085845fed61b9b5dec367 Mon Sep 17 00:00:00 2001 From: Austin Kerbow Date: Mon, 21 Oct 2019 22:18:26 +0000 Subject: [PATCH] AMDGPU/GlobalISel: Legalize fast unsafe FDIV Reviewers: arsenm Reviewed By: arsenm Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, dstuttard, tpr, t-tye, hiraditya, Petar.Avramovic, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D69231 llvm-svn: 375460 --- .../CodeGen/GlobalISel/MachineIRBuilder.h | 5 +- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 88 +- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 8 +- .../GlobalISel/legalize-fast-unsafe-fdiv.mir | 798 ++++++++++++++++++ 4 files changed, 891 insertions(+), 8 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fast-unsafe-fdiv.mir diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index a77344c555f8..416f9c19f794 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -1386,8 +1386,9 @@ public: } /// Build and insert \p Res = G_FNEG \p Op0 - MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0) { - return buildInstr(TargetOpcode::G_FNEG, {Dst}, {Src0}); + MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, + Optional Flags = None) { + return buildInstr(TargetOpcode::G_FNEG, {Dst}, {Src0}, Flags); } /// Build and insert \p Res = G_FABS \p Op0 diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index b226f6c2d2e7..5aba35a19ced 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -336,6 +336,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .legalFor({S32, S64}); auto &TrigActions = getActionDefinitionsBuilder({G_FSIN, G_FCOS}) .customFor({S32, S64}); + auto &FDIVActions = getActionDefinitionsBuilder(G_FDIV) + .customFor({S32, S64}); if (ST.has16BitInsts()) { if (ST.hasVOP3PInsts()) @@ -344,6 +346,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, FPOpActions.legalFor({S16}); TrigActions.customFor({S16}); + FDIVActions.customFor({S16}); } auto &MinNumMaxNum = getActionDefinitionsBuilder({ @@ -375,6 +378,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .scalarize(0) .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64); + FDIVActions + .scalarize(0) + .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64); + getActionDefinitionsBuilder({G_FNEG, G_FABS}) .legalFor(FPTypesPK16) .clampMaxNumElements(0, S16, 2) @@ -1107,6 +1114,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI, return legalizeLoad(MI, MRI, B, Observer); case TargetOpcode::G_FMAD: return legalizeFMad(MI, MRI, B); + case TargetOpcode::G_FDIV: + return legalizeFDIV(MI, MRI, B); default: return false; } @@ -1810,9 +1819,80 @@ bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin( return false; } -bool AMDGPULegalizerInfo::legalizeFDIVFast(MachineInstr &MI, - MachineRegisterInfo &MRI, - MachineIRBuilder &B) const { +bool AMDGPULegalizerInfo::legalizeFDIV(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + B.setInstr(MI); + + if (legalizeFastUnsafeFDIV(MI, MRI, B)) + return true; + + return false; +} + +bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + Register Res = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + + uint16_t Flags = MI.getFlags(); + + LLT ResTy = MRI.getType(Res); + LLT S32 = LLT::scalar(32); + LLT S64 = LLT::scalar(64); + + const MachineFunction &MF = B.getMF(); + bool Unsafe = + MF.getTarget().Options.UnsafeFPMath || MI.getFlag(MachineInstr::FmArcp); + + if (!MF.getTarget().Options.UnsafeFPMath && ResTy == S64) + return false; + + if (!Unsafe && ResTy == S32 && ST.hasFP32Denormals()) + return false; + + if (auto CLHS = getConstantFPVRegVal(LHS, MRI)) { + // 1 / x -> RCP(x) + if (CLHS->isExactlyValue(1.0)) { + B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res, false) + .addUse(RHS) + .setMIFlags(Flags); + + MI.eraseFromParent(); + return true; + } + + // -1 / x -> RCP( FNEG(x) ) + if (CLHS->isExactlyValue(-1.0)) { + auto FNeg = B.buildFNeg(ResTy, RHS, Flags); + B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res, false) + .addUse(FNeg.getReg(0)) + .setMIFlags(Flags); + + MI.eraseFromParent(); + return true; + } + } + + // x / y -> x * (1.0 / y) + if (Unsafe) { + auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {ResTy}, false) + .addUse(RHS) + .setMIFlags(Flags); + B.buildFMul(Res, LHS, RCP, Flags); + + MI.eraseFromParent(); + return true; + } + + return false; +} + +bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { B.setInstr(MI); Register Res = MI.getOperand(0).getReg(); Register LHS = MI.getOperand(2).getReg(); @@ -2029,7 +2109,7 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI, return legalizePreloadedArgIntrin(MI, MRI, B, AMDGPUFunctionArgInfo::DISPATCH_ID); case Intrinsic::amdgcn_fdiv_fast: - return legalizeFDIVFast(MI, MRI, B); + return legalizeFDIVFastIntrin(MI, MRI, B); case Intrinsic::amdgcn_is_shared: return legalizeIsAddrSpace(MI, MRI, B, AMDGPUAS::LOCAL_ADDRESS); case Intrinsic::amdgcn_is_private: diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index e2edadc9697b..d0fba23a8686 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -81,8 +81,12 @@ public: MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const; - bool legalizeFDIVFast(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &B) const; + bool legalizeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + bool legalizeFastUnsafeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + bool legalizeFDIVFastIntrin(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; bool legalizeImplicitArgPtr(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fast-unsafe-fdiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fast-unsafe-fdiv.mir new file mode 100644 index 000000000000..cada37c8919e --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fast-unsafe-fdiv.mir @@ -0,0 +1,798 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -enable-unsafe-fp-math -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9-UNSAFE %s + +--- +name: test_fast_unsafe_fdiv_s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; SI-LABEL: name: test_fast_unsafe_fdiv_s16 + ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI: [[FDIV:%[0-9]+]]:_(s32) = G_FDIV [[FPEXT]], [[FPEXT1]] + ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FDIV]](s32) + ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-LABEL: name: test_fast_unsafe_fdiv_s16 + ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI: [[FDIV:%[0-9]+]]:_(s16) = G_FDIV [[TRUNC]], [[TRUNC1]] + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FDIV]](s16) + ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-LABEL: name: test_fast_unsafe_fdiv_s16 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[FDIV:%[0-9]+]]:_(s16) = G_FDIV [[TRUNC]], [[TRUNC1]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FDIV]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_s16 + ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC1]](s16) + ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[INT]] + ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL]](s16) + ; GFX9-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(s16) = G_FDIV %2, %3 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... + +--- +name: test_fast_unsafe_fdiv_s32 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; SI-LABEL: name: test_fast_unsafe_fdiv_s32 + ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI: [[FDIV:%[0-9]+]]:_(s32) = G_FDIV [[COPY]], [[COPY1]] + ; SI: $vgpr0 = COPY [[FDIV]](s32) + ; VI-LABEL: name: test_fast_unsafe_fdiv_s32 + ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI: [[FDIV:%[0-9]+]]:_(s32) = G_FDIV [[COPY]], [[COPY1]] + ; VI: $vgpr0 = COPY [[FDIV]](s32) + ; GFX9-LABEL: name: test_fast_unsafe_fdiv_s32 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[FDIV:%[0-9]+]]:_(s32) = G_FDIV [[COPY]], [[COPY1]] + ; GFX9: $vgpr0 = COPY [[FDIV]](s32) + ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_s32 + ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32) + ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[INT]] + ; GFX9-UNSAFE: $vgpr0 = COPY [[FMUL]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = G_FDIV %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: test_fast_unsafe_fdiv_s32_arcp +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; SI-LABEL: name: test_fast_unsafe_fdiv_s32_arcp + ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI: [[INT:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32) + ; SI: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[COPY]], [[INT]] + ; SI: $vgpr0 = COPY [[FMUL]](s32) + ; VI-LABEL: name: test_fast_unsafe_fdiv_s32_arcp + ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI: [[INT:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32) + ; VI: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[COPY]], [[INT]] + ; VI: $vgpr0 = COPY [[FMUL]](s32) + ; GFX9-LABEL: name: test_fast_unsafe_fdiv_s32_arcp + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[INT:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32) + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[COPY]], [[INT]] + ; GFX9: $vgpr0 = COPY [[FMUL]](s32) + ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_s32_arcp + ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32) + ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[COPY]], [[INT]] + ; GFX9-UNSAFE: $vgpr0 = COPY [[FMUL]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = arcp G_FDIV %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: test_fast_unsafe_fdiv_s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; SI-LABEL: name: test_fast_unsafe_fdiv_s64 + ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; SI: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[COPY]], [[COPY1]] + ; SI: $vgpr0_vgpr1 = COPY [[FDIV]](s64) + ; VI-LABEL: name: test_fast_unsafe_fdiv_s64 + ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; VI: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[COPY]], [[COPY1]] + ; VI: $vgpr0_vgpr1 = COPY [[FDIV]](s64) + ; GFX9-LABEL: name: test_fast_unsafe_fdiv_s64 + ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[COPY]], [[COPY1]] + ; GFX9: $vgpr0_vgpr1 = COPY [[FDIV]](s64) + ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_s64 + ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s64) + ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[INT]] + ; GFX9-UNSAFE: $vgpr0_vgpr1 = COPY [[FMUL]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_FDIV %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: test_fast_unsafe_fdiv_v2s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; SI-LABEL: name: test_fast_unsafe_fdiv_v2s32 + ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; SI: [[FDIV:%[0-9]+]]:_(s32) = G_FDIV [[UV]], [[UV2]] + ; SI: [[FDIV1:%[0-9]+]]:_(s32) = G_FDIV [[UV1]], [[UV3]] + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FDIV]](s32), [[FDIV1]](s32) + ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-LABEL: name: test_fast_unsafe_fdiv_v2s32 + ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; VI: [[FDIV:%[0-9]+]]:_(s32) = G_FDIV [[UV]], [[UV2]] + ; VI: [[FDIV1:%[0-9]+]]:_(s32) = G_FDIV [[UV1]], [[UV3]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FDIV]](s32), [[FDIV1]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-LABEL: name: test_fast_unsafe_fdiv_v2s32 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9: [[FDIV:%[0-9]+]]:_(s32) = G_FDIV [[UV]], [[UV2]] + ; GFX9: [[FDIV1:%[0-9]+]]:_(s32) = G_FDIV [[UV1]], [[UV3]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FDIV]](s32), [[FDIV1]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_v2s32 + ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-UNSAFE: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV2]](s32) + ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[INT]] + ; GFX9-UNSAFE: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV3]](s32) + ; GFX9-UNSAFE: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[INT1]] + ; GFX9-UNSAFE: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32) + ; GFX9-UNSAFE: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x s32>) = G_FDIV %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: test_fast_unsafe_fdiv_v2s32_flags +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; SI-LABEL: name: test_fast_unsafe_fdiv_v2s32_flags + ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; SI: [[FDIV:%[0-9]+]]:_(s32) = nnan G_FDIV [[UV]], [[UV2]] + ; SI: [[FDIV1:%[0-9]+]]:_(s32) = nnan G_FDIV [[UV1]], [[UV3]] + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FDIV]](s32), [[FDIV1]](s32) + ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-LABEL: name: test_fast_unsafe_fdiv_v2s32_flags + ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; VI: [[FDIV:%[0-9]+]]:_(s32) = nnan G_FDIV [[UV]], [[UV2]] + ; VI: [[FDIV1:%[0-9]+]]:_(s32) = nnan G_FDIV [[UV1]], [[UV3]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FDIV]](s32), [[FDIV1]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-LABEL: name: test_fast_unsafe_fdiv_v2s32_flags + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9: [[FDIV:%[0-9]+]]:_(s32) = nnan G_FDIV [[UV]], [[UV2]] + ; GFX9: [[FDIV1:%[0-9]+]]:_(s32) = nnan G_FDIV [[UV1]], [[UV3]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FDIV]](s32), [[FDIV1]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_v2s32_flags + ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-UNSAFE: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV2]](s32) + ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[UV]], [[INT]] + ; GFX9-UNSAFE: [[INT1:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV3]](s32) + ; GFX9-UNSAFE: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[UV1]], [[INT1]] + ; GFX9-UNSAFE: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32) + ; GFX9-UNSAFE: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x s32>) = nnan G_FDIV %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: test_fast_unsafe_fdiv_v3s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 + + ; SI-LABEL: name: test_fast_unsafe_fdiv_v3s32 + ; SI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; SI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI: [[FDIV:%[0-9]+]]:_(s32) = G_FDIV [[UV]], [[UV3]] + ; SI: [[FDIV1:%[0-9]+]]:_(s32) = G_FDIV [[UV1]], [[UV4]] + ; SI: [[FDIV2:%[0-9]+]]:_(s32) = G_FDIV [[UV2]], [[UV5]] + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FDIV]](s32), [[FDIV1]](s32), [[FDIV2]](s32) + ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-LABEL: name: test_fast_unsafe_fdiv_v3s32 + ; VI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; VI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI: [[FDIV:%[0-9]+]]:_(s32) = G_FDIV [[UV]], [[UV3]] + ; VI: [[FDIV1:%[0-9]+]]:_(s32) = G_FDIV [[UV1]], [[UV4]] + ; VI: [[FDIV2:%[0-9]+]]:_(s32) = G_FDIV [[UV2]], [[UV5]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FDIV]](s32), [[FDIV1]](s32), [[FDIV2]](s32) + ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-LABEL: name: test_fast_unsafe_fdiv_v3s32 + ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX9: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9: [[FDIV:%[0-9]+]]:_(s32) = G_FDIV [[UV]], [[UV3]] + ; GFX9: [[FDIV1:%[0-9]+]]:_(s32) = G_FDIV [[UV1]], [[UV4]] + ; GFX9: [[FDIV2:%[0-9]+]]:_(s32) = G_FDIV [[UV2]], [[UV5]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FDIV]](s32), [[FDIV1]](s32), [[FDIV2]](s32) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_v3s32 + ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX9-UNSAFE: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV3]](s32) + ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[INT]] + ; GFX9-UNSAFE: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV4]](s32) + ; GFX9-UNSAFE: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[INT1]] + ; GFX9-UNSAFE: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV5]](s32) + ; GFX9-UNSAFE: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[INT2]] + ; GFX9-UNSAFE: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32), [[FMUL2]](s32) + ; GFX9-UNSAFE: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x s32>) = G_FDIV %0, %1 + $vgpr0_vgpr1_vgpr2 = COPY %2 +... + +--- +name: test_fast_unsafe_fdiv_v2s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 + + ; SI-LABEL: name: test_fast_unsafe_fdiv_v2s64 + ; SI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; SI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; SI: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; SI: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[UV]], [[UV2]] + ; SI: [[FDIV1:%[0-9]+]]:_(s64) = G_FDIV [[UV1]], [[UV3]] + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FDIV]](s64), [[FDIV1]](s64) + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-LABEL: name: test_fast_unsafe_fdiv_v2s64 + ; VI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; VI: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; VI: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[UV]], [[UV2]] + ; VI: [[FDIV1:%[0-9]+]]:_(s64) = G_FDIV [[UV1]], [[UV3]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FDIV]](s64), [[FDIV1]](s64) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-LABEL: name: test_fast_unsafe_fdiv_v2s64 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX9: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[UV]], [[UV2]] + ; GFX9: [[FDIV1:%[0-9]+]]:_(s64) = G_FDIV [[UV1]], [[UV3]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FDIV]](s64), [[FDIV1]](s64) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_v2s64 + ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9-UNSAFE: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV2]](s64) + ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[UV]], [[INT]] + ; GFX9-UNSAFE: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV3]](s64) + ; GFX9-UNSAFE: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[UV1]], [[INT1]] + ; GFX9-UNSAFE: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMUL]](s64), [[FMUL1]](s64) + ; GFX9-UNSAFE: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x s64>) = G_FDIV %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 +... + +--- +name: test_fast_unsafe_fdiv_v2s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; SI-LABEL: name: test_fast_unsafe_fdiv_v2s16 + ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) + ; SI: [[FDIV:%[0-9]+]]:_(s16) = G_FDIV [[UV]], [[UV2]] + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) + ; SI: [[FDIV1:%[0-9]+]]:_(s32) = G_FDIV [[FPEXT]], [[FPEXT1]] + ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FDIV1]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FDIV]](s16), [[FPTRUNC]](s16) + ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI-LABEL: name: test_fast_unsafe_fdiv_v2s16 + ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) + ; VI: [[FDIV:%[0-9]+]]:_(s16) = G_FDIV [[UV]], [[UV2]] + ; VI: [[FDIV1:%[0-9]+]]:_(s16) = G_FDIV [[UV1]], [[UV3]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FDIV]](s16), [[FDIV1]](s16) + ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-LABEL: name: test_fast_unsafe_fdiv_v2s16 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) + ; GFX9: [[FDIV:%[0-9]+]]:_(s16) = G_FDIV [[UV]], [[UV2]] + ; GFX9: [[FDIV1:%[0-9]+]]:_(s16) = G_FDIV [[UV1]], [[UV3]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FDIV]](s16), [[FDIV1]](s16) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_v2s16 + ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-UNSAFE: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-UNSAFE: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-UNSAFE: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-UNSAFE: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-UNSAFE: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-UNSAFE: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC2]](s16) + ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[INT]] + ; GFX9-UNSAFE: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC3]](s16) + ; GFX9-UNSAFE: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[INT1]] + ; GFX9-UNSAFE: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMUL]](s16), [[FMUL1]](s16) + ; GFX9-UNSAFE: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr1 + %2:_(<2 x s16>) = G_FDIV %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: test_fast_unsafe_fdiv_v3s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; SI-LABEL: name: test_fast_unsafe_fdiv_v3s16 + ; SI: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) + ; SI: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF1]](<3 x s16>) + ; SI: [[FDIV:%[0-9]+]]:_(s16) = G_FDIV [[UV]], [[UV3]] + ; SI: [[FDIV1:%[0-9]+]]:_(s16) = G_FDIV [[UV1]], [[UV4]] + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16) + ; SI: [[FDIV2:%[0-9]+]]:_(s32) = G_FDIV [[FPEXT]], [[FPEXT1]] + ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FDIV2]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FDIV]](s16), [[FDIV1]](s16), [[FPTRUNC]](s16) + ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>) + ; VI-LABEL: name: test_fast_unsafe_fdiv_v3s16 + ; VI: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) + ; VI: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF1]](<3 x s16>) + ; VI: [[FDIV:%[0-9]+]]:_(s16) = G_FDIV [[UV]], [[UV3]] + ; VI: [[FDIV1:%[0-9]+]]:_(s16) = G_FDIV [[UV1]], [[UV4]] + ; VI: [[FDIV2:%[0-9]+]]:_(s16) = G_FDIV [[UV2]], [[UV5]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FDIV]](s16), [[FDIV1]](s16), [[FDIV2]](s16) + ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>) + ; GFX9-LABEL: name: test_fast_unsafe_fdiv_v3s16 + ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) + ; GFX9: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF1]](<3 x s16>) + ; GFX9: [[FDIV:%[0-9]+]]:_(s16) = G_FDIV [[UV]], [[UV3]] + ; GFX9: [[FDIV1:%[0-9]+]]:_(s16) = G_FDIV [[UV1]], [[UV4]] + ; GFX9: [[FDIV2:%[0-9]+]]:_(s16) = G_FDIV [[UV2]], [[UV5]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FDIV]](s16), [[FDIV1]](s16), [[FDIV2]](s16) + ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>) + ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_v3s16 + ; GFX9-UNSAFE: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-UNSAFE: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX9-UNSAFE: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-UNSAFE: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; GFX9-UNSAFE: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-UNSAFE: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9-UNSAFE: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-UNSAFE: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-UNSAFE: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-UNSAFE: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-UNSAFE: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-UNSAFE: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-UNSAFE: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9-UNSAFE: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; GFX9-UNSAFE: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-UNSAFE: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-UNSAFE: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-UNSAFE: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-UNSAFE: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-UNSAFE: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-UNSAFE: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC3]](s16) + ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[INT]] + ; GFX9-UNSAFE: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC4]](s16) + ; GFX9-UNSAFE: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[INT1]] + ; GFX9-UNSAFE: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC5]](s16) + ; GFX9-UNSAFE: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[INT2]] + ; GFX9-UNSAFE: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-UNSAFE: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMUL]](s16), [[FMUL1]](s16) + ; GFX9-UNSAFE: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMUL2]](s16), [[DEF4]](s16) + ; GFX9-UNSAFE: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-UNSAFE: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9-UNSAFE: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) + %0:_(<3 x s16>) = G_IMPLICIT_DEF + %1:_(<3 x s16>) = G_IMPLICIT_DEF + %2:_(<3 x s16>) = G_FDIV %0, %1 + S_NOP 0, implicit %2 +... + +--- +name: test_fast_unsafe_fdiv_v4s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; SI-LABEL: name: test_fast_unsafe_fdiv_v4s16 + ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; SI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI: [[FDIV:%[0-9]+]]:_(s16) = G_FDIV [[UV]], [[UV4]] + ; SI: [[FDIV1:%[0-9]+]]:_(s16) = G_FDIV [[UV1]], [[UV5]] + ; SI: [[FDIV2:%[0-9]+]]:_(s16) = G_FDIV [[UV2]], [[UV6]] + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV7]](s16) + ; SI: [[FDIV3:%[0-9]+]]:_(s32) = G_FDIV [[FPEXT]], [[FPEXT1]] + ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FDIV3]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FDIV]](s16), [[FDIV1]](s16), [[FDIV2]](s16), [[FPTRUNC]](s16) + ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI-LABEL: name: test_fast_unsafe_fdiv_v4s16 + ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; VI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI: [[FDIV:%[0-9]+]]:_(s16) = G_FDIV [[UV]], [[UV4]] + ; VI: [[FDIV1:%[0-9]+]]:_(s16) = G_FDIV [[UV1]], [[UV5]] + ; VI: [[FDIV2:%[0-9]+]]:_(s16) = G_FDIV [[UV2]], [[UV6]] + ; VI: [[FDIV3:%[0-9]+]]:_(s16) = G_FDIV [[UV3]], [[UV7]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FDIV]](s16), [[FDIV1]](s16), [[FDIV2]](s16), [[FDIV3]](s16) + ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX9-LABEL: name: test_fast_unsafe_fdiv_v4s16 + ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9: [[FDIV:%[0-9]+]]:_(s16) = G_FDIV [[UV]], [[UV4]] + ; GFX9: [[FDIV1:%[0-9]+]]:_(s16) = G_FDIV [[UV1]], [[UV5]] + ; GFX9: [[FDIV2:%[0-9]+]]:_(s16) = G_FDIV [[UV2]], [[UV6]] + ; GFX9: [[FDIV3:%[0-9]+]]:_(s16) = G_FDIV [[UV3]], [[UV7]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FDIV]](s16), [[FDIV1]](s16), [[FDIV2]](s16), [[FDIV3]](s16) + ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_v4s16 + ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-UNSAFE: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-UNSAFE: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-UNSAFE: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-UNSAFE: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-UNSAFE: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-UNSAFE: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-UNSAFE: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-UNSAFE: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-UNSAFE: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-UNSAFE: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-UNSAFE: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-UNSAFE: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-UNSAFE: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-UNSAFE: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-UNSAFE: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC4]](s16) + ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[INT]] + ; GFX9-UNSAFE: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC5]](s16) + ; GFX9-UNSAFE: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[INT1]] + ; GFX9-UNSAFE: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC6]](s16) + ; GFX9-UNSAFE: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[INT2]] + ; GFX9-UNSAFE: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC7]](s16) + ; GFX9-UNSAFE: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[INT3]] + ; GFX9-UNSAFE: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMUL]](s16), [[FMUL1]](s16) + ; GFX9-UNSAFE: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMUL2]](s16), [[FMUL3]](s16) + ; GFX9-UNSAFE: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-UNSAFE: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x s16>) = G_FDIV %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: test_fast_unsafe_fdiv_s16_constant_one_rcp +body: | + bb.0: + liveins: $vgpr0 + + ; SI-LABEL: name: test_fast_unsafe_fdiv_s16_constant_one_rcp + ; SI: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 + ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI: [[FDIV:%[0-9]+]]:_(s32) = G_FDIV [[FPEXT]], [[FPEXT1]] + ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FDIV]](s32) + ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-LABEL: name: test_fast_unsafe_fdiv_s16_constant_one_rcp + ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC]](s16) + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) + ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-LABEL: name: test_fast_unsafe_fdiv_s16_constant_one_rcp + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC]](s16) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_s16_constant_one_rcp + ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC]](s16) + ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) + ; GFX9-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s16) = G_FCONSTANT half 1.0 + %1:_(s32) = COPY $vgpr0 + %2:_(s16) = G_TRUNC %1 + %3:_(s16) = G_FDIV %0, %2 + %4:_(s32) = G_ANYEXT %3 + $vgpr0 = COPY %4 +... + +--- +name: test_fast_unsafe_fdiv_s16_constant_negative_one_rcp +body: | + bb.0: + liveins: $vgpr0 + + ; SI-LABEL: name: test_fast_unsafe_fdiv_s16_constant_negative_one_rcp + ; SI: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xHBC00 + ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI: [[FDIV:%[0-9]+]]:_(s32) = G_FDIV [[FPEXT]], [[FPEXT1]] + ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FDIV]](s32) + ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-LABEL: name: test_fast_unsafe_fdiv_s16_constant_negative_one_rcp + ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]] + ; VI: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s16) + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) + ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-LABEL: name: test_fast_unsafe_fdiv_s16_constant_negative_one_rcp + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]] + ; GFX9: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s16) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_s16_constant_negative_one_rcp + ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-UNSAFE: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]] + ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s16) + ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) + ; GFX9-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s16) = G_FCONSTANT half -1.0 + %1:_(s32) = COPY $vgpr0 + %2:_(s16) = G_TRUNC %1 + %3:_(s16) = G_FDIV %0, %2 + %4:_(s32) = G_ANYEXT %3 + $vgpr0 = COPY %4 +... + +--- +name: test_fast_unsafe_fdiv_s32_constant_one_rcp +body: | + bb.0: + liveins: $vgpr0 + + ; SI-LABEL: name: test_fast_unsafe_fdiv_s32_constant_one_rcp + ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s32) + ; SI: $vgpr0 = COPY [[INT]](s32) + ; VI-LABEL: name: test_fast_unsafe_fdiv_s32_constant_one_rcp + ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s32) + ; VI: $vgpr0 = COPY [[INT]](s32) + ; GFX9-LABEL: name: test_fast_unsafe_fdiv_s32_constant_one_rcp + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s32) + ; GFX9: $vgpr0 = COPY [[INT]](s32) + ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_s32_constant_one_rcp + ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s32) + ; GFX9-UNSAFE: $vgpr0 = COPY [[INT]](s32) + %0:_(s32) = G_FCONSTANT float 1.0 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = G_FDIV %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: test_fast_unsafe_fdiv_s32_constant_negative_one_rcp +body: | + bb.0: + liveins: $vgpr0 + + ; SI-LABEL: name: test_fast_unsafe_fdiv_s32_constant_negative_one_rcp + ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] + ; SI: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s32) + ; SI: $vgpr0 = COPY [[INT]](s32) + ; VI-LABEL: name: test_fast_unsafe_fdiv_s32_constant_negative_one_rcp + ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] + ; VI: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s32) + ; VI: $vgpr0 = COPY [[INT]](s32) + ; GFX9-LABEL: name: test_fast_unsafe_fdiv_s32_constant_negative_one_rcp + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] + ; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s32) + ; GFX9: $vgpr0 = COPY [[INT]](s32) + ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_s32_constant_negative_one_rcp + ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-UNSAFE: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] + ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s32) + ; GFX9-UNSAFE: $vgpr0 = COPY [[INT]](s32) + %0:_(s32) = G_FCONSTANT float -1.0 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = G_FDIV %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: test_fast_unsafe_fdiv_s64_constant_one_rcp +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; SI-LABEL: name: test_fast_unsafe_fdiv_s64_constant_one_rcp + ; SI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; SI: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[C]], [[COPY]] + ; SI: $vgpr0_vgpr1 = COPY [[FDIV]](s64) + ; VI-LABEL: name: test_fast_unsafe_fdiv_s64_constant_one_rcp + ; VI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; VI: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[C]], [[COPY]] + ; VI: $vgpr0_vgpr1 = COPY [[FDIV]](s64) + ; GFX9-LABEL: name: test_fast_unsafe_fdiv_s64_constant_one_rcp + ; GFX9: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX9: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[C]], [[COPY]] + ; GFX9: $vgpr0_vgpr1 = COPY [[FDIV]](s64) + ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_s64_constant_one_rcp + ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s64) + ; GFX9-UNSAFE: $vgpr0_vgpr1 = COPY [[INT]](s64) + %0:_(s64) = G_FCONSTANT double 1.0 + %1:_(s64) = COPY $vgpr0_vgpr1 + %2:_(s64) = G_FDIV %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: test_fast_unsafe_fdiv_s64_constant_negative_one_rcp +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; SI-LABEL: name: test_fast_unsafe_fdiv_s64_constant_negative_one_rcp + ; SI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -1.000000e+00 + ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; SI: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[C]], [[COPY]] + ; SI: $vgpr0_vgpr1 = COPY [[FDIV]](s64) + ; VI-LABEL: name: test_fast_unsafe_fdiv_s64_constant_negative_one_rcp + ; VI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -1.000000e+00 + ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; VI: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[C]], [[COPY]] + ; VI: $vgpr0_vgpr1 = COPY [[FDIV]](s64) + ; GFX9-LABEL: name: test_fast_unsafe_fdiv_s64_constant_negative_one_rcp + ; GFX9: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -1.000000e+00 + ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX9: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[C]], [[COPY]] + ; GFX9: $vgpr0_vgpr1 = COPY [[FDIV]](s64) + ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_s64_constant_negative_one_rcp + ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX9-UNSAFE: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY]] + ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s64) + ; GFX9-UNSAFE: $vgpr0_vgpr1 = COPY [[INT]](s64) + %0:_(s64) = G_FCONSTANT double -1.0 + %1:_(s64) = COPY $vgpr0_vgpr1 + %2:_(s64) = G_FDIV %0, %1 + $vgpr0_vgpr1 = COPY %2 +...