From ed5017e153c76adc13badcfd123c68dd074e75d1 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 31 May 2020 11:58:56 -0400 Subject: [PATCH] GlobalISel: Start defining strict FP instructions The AMDGPU lowering for unconstrained G_FDIV sometimes needs to introduce a mode switch in the middle, so it's helpful to have constrained instructions available to legalize this. Right now nothing is preventing reordering of the mode switch with the other instructions in the expansion. --- .../llvm/CodeGen/GlobalISel/IRTranslator.h | 3 + llvm/include/llvm/Support/TargetOpcodes.def | 9 + llvm/include/llvm/Target/GenericOpcodes.td | 28 +- .../Target/GlobalISel/SelectionDAGCompat.td | 8 + llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 51 ++++ .../GlobalISel/irtranslator-constrained-fp.ll | 243 ++++++++++++++++++ 6 files changed, 341 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h index 19cb363f5ec6..5c3c09a1630e 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -236,6 +236,9 @@ private: bool translateSimpleIntrinsic(const CallInst &CI, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder); + bool translateConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI, + MachineIRBuilder &MIRBuilder); + bool translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder); diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index eae831f3353b..c069f5d22ba8 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -640,6 +640,15 @@ HANDLE_TARGET_OPCODE(G_JUMP_TABLE) /// Generic dynamic stack allocation. HANDLE_TARGET_OPCODE(G_DYN_STACKALLOC) +/// Strict floating point instructions. +HANDLE_TARGET_OPCODE(G_STRICT_FADD) +HANDLE_TARGET_OPCODE(G_STRICT_FSUB) +HANDLE_TARGET_OPCODE(G_STRICT_FMUL) +HANDLE_TARGET_OPCODE(G_STRICT_FDIV) +HANDLE_TARGET_OPCODE(G_STRICT_FREM) +HANDLE_TARGET_OPCODE(G_STRICT_FMA) +HANDLE_TARGET_OPCODE(G_STRICT_FSQRT) + /// read_register intrinsic HANDLE_TARGET_OPCODE(G_READ_REGISTER) diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td index 5ba7844c8c9e..79b965e3fef0 100644 --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -19,6 +19,22 @@ class GenericInstruction : StandardPseudoInstruction { let isPreISelOpcode = 1; } +// Provide a variant of an instruction with the same operands, but +// different instruction flags. This is intended to provide a +// convenient way to define strict floating point variants of ordinary +// floating point instructions. +class ConstrainedIntruction : + GenericInstruction { + let OutOperandList = baseInst.OutOperandList; + let InOperandList = baseInst.InOperandList; + let isCommutable = baseInst.isCommutable; + + // TODO: Do we need a better way to mark reads from FP mode than + // hasSideEffects? + let hasSideEffects = 1; + let mayRaiseFPException = 1; +} + // Extend the underlying scalar type of an operation, leaving the high bits // unspecified. def G_ANYEXT : GenericInstruction { @@ -1128,4 +1144,14 @@ def G_SHUFFLE_VECTOR: GenericInstruction { let hasSideEffects = 0; } -// TODO: Add the other generic opcodes. +//------------------------------------------------------------------------------ +// Constrained floating point ops +//------------------------------------------------------------------------------ + +def G_STRICT_FADD : ConstrainedIntruction; +def G_STRICT_FSUB : ConstrainedIntruction; +def G_STRICT_FMUL : ConstrainedIntruction; +def G_STRICT_FDIV : ConstrainedIntruction; +def G_STRICT_FREM : ConstrainedIntruction; +def G_STRICT_FMA : ConstrainedIntruction; +def G_STRICT_FSQRT : ConstrainedIntruction; diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index dec8797b7d33..b8f03bcec16b 100644 --- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -124,6 +124,14 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; + // Broadly speaking G_LOAD is equivalent to ISD::LOAD but there are some // complications that tablegen must take care of. For example, Predicates such // as isSignExtLoad require that this is not a perfect 1:1 mapping since a diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index df965e466698..cb538f032c31 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1307,6 +1307,51 @@ bool IRTranslator::translateSimpleIntrinsic(const CallInst &CI, return true; } +// TODO: Include ConstainedOps.def when all strict instructions are defined. +static unsigned getConstrainedOpcode(Intrinsic::ID ID) { + switch (ID) { + case Intrinsic::experimental_constrained_fadd: + return TargetOpcode::G_STRICT_FADD; + case Intrinsic::experimental_constrained_fsub: + return TargetOpcode::G_STRICT_FSUB; + case Intrinsic::experimental_constrained_fmul: + return TargetOpcode::G_STRICT_FMUL; + case Intrinsic::experimental_constrained_fdiv: + return TargetOpcode::G_STRICT_FDIV; + case Intrinsic::experimental_constrained_frem: + return TargetOpcode::G_STRICT_FREM; + case Intrinsic::experimental_constrained_fma: + return TargetOpcode::G_STRICT_FMA; + case Intrinsic::experimental_constrained_sqrt: + return TargetOpcode::G_STRICT_FSQRT; + default: + return 0; + } +} + +bool IRTranslator::translateConstrainedFPIntrinsic( + const ConstrainedFPIntrinsic &FPI, MachineIRBuilder &MIRBuilder) { + fp::ExceptionBehavior EB = FPI.getExceptionBehavior().getValue(); + + unsigned Opcode = getConstrainedOpcode(FPI.getIntrinsicID()); + if (!Opcode) + return false; + + unsigned Flags = MachineInstr::copyFlagsFromInstruction(FPI); + if (EB == fp::ExceptionBehavior::ebIgnore) + Flags |= MachineInstr::NoFPExcept; + + SmallVector VRegs; + VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(0))); + if (!FPI.isUnaryOp()) + VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(1))); + if (FPI.isTernaryOp()) + VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(2))); + + MIRBuilder.buildInstr(Opcode, {getOrCreateVReg(FPI)}, VRegs, Flags); + return true; +} + bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder) { @@ -1572,6 +1617,12 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, .addUse(getOrCreateVReg(*CI.getArgOperand(1))); return true; } +#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ + case Intrinsic::INTRINSIC: +#include "llvm/IR/ConstrainedOps.def" + return translateConstrainedFPIntrinsic(cast(CI), + MIRBuilder); + } return false; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll new file mode 100644 index 000000000000..3788c81edd6c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll @@ -0,0 +1,243 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -global-isel -march=amdgcn -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s + +define float @v_constained_fadd_f32_fpexcept_strict(float %x, float %y) #0 { + ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_strict + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[STRICT_FADD:%[0-9]+]]:_(s32) = G_STRICT_FADD [[COPY]], [[COPY1]] + ; CHECK: $vgpr0 = COPY [[STRICT_FADD]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") + ret float %val +} + +define float @v_constained_fadd_f32_fpexcept_strict_flags(float %x, float %y) #0 { + ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_strict_flags + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[STRICT_FADD:%[0-9]+]]:_(s32) = nsz G_STRICT_FADD [[COPY]], [[COPY1]] + ; CHECK: $vgpr0 = COPY [[STRICT_FADD]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %val = call nsz float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") + ret float %val +} + +define float @v_constained_fadd_f32_fpexcept_ignore(float %x, float %y) #0 { + ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_ignore + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: %3:_(s32) = nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]] + ; CHECK: $vgpr0 = COPY %3(s32) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") + ret float %val +} + +define float @v_constained_fadd_f32_fpexcept_ignore_flags(float %x, float %y) #0 { + ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_ignore_flags + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: %3:_(s32) = nsz nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]] + ; CHECK: $vgpr0 = COPY %3(s32) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %val = call nsz float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") + ret float %val +} + +define float @v_constained_fadd_f32_fpexcept_maytrap(float %x, float %y) #0 { + ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_maytrap + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[STRICT_FADD:%[0-9]+]]:_(s32) = G_STRICT_FADD [[COPY]], [[COPY1]] + ; CHECK: $vgpr0 = COPY [[STRICT_FADD]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") + ret float %val +} + +define <2 x float> @v_constained_fadd_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y) #0 { + ; CHECK-LABEL: name: v_constained_fadd_v2f32_fpexcept_strict + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[STRICT_FADD:%[0-9]+]]:_(<2 x s32>) = G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[STRICT_FADD]](<2 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") + ret <2 x float> %val +} + +define <2 x float> @v_constained_fadd_v2f32_fpexcept_ignore(<2 x float> %x, <2 x float> %y) #0 { + ; CHECK-LABEL: name: v_constained_fadd_v2f32_fpexcept_ignore + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: %7:_(<2 x s32>) = nofpexcept G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %7(<2 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") + ret <2 x float> %val +} + +define <2 x float> @v_constained_fadd_v2f32_fpexcept_maytrap(<2 x float> %x, <2 x float> %y) #0 { + ; CHECK-LABEL: name: v_constained_fadd_v2f32_fpexcept_maytrap + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[STRICT_FADD:%[0-9]+]]:_(<2 x s32>) = G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[STRICT_FADD]](<2 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") + ret <2 x float> %val +} + +define float @v_constained_fsub_f32_fpexcept_ignore_flags(float %x, float %y) #0 { + ; CHECK-LABEL: name: v_constained_fsub_f32_fpexcept_ignore_flags + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: %3:_(s32) = nsz nofpexcept G_STRICT_FSUB [[COPY]], [[COPY1]] + ; CHECK: $vgpr0 = COPY %3(s32) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %val = call nsz float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") + ret float %val +} + +define float @v_constained_fmul_f32_fpexcept_ignore_flags(float %x, float %y) #0 { + ; CHECK-LABEL: name: v_constained_fmul_f32_fpexcept_ignore_flags + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: %3:_(s32) = nsz nofpexcept G_STRICT_FMUL [[COPY]], [[COPY1]] + ; CHECK: $vgpr0 = COPY %3(s32) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %val = call nsz float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") + ret float %val +} + +define float @v_constained_fdiv_f32_fpexcept_ignore_flags(float %x, float %y) #0 { + ; CHECK-LABEL: name: v_constained_fdiv_f32_fpexcept_ignore_flags + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: %3:_(s32) = nsz nofpexcept G_STRICT_FDIV [[COPY]], [[COPY1]] + ; CHECK: $vgpr0 = COPY %3(s32) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %val = call nsz float @llvm.experimental.constrained.fdiv.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") + ret float %val +} + +define float @v_constained_frem_f32_fpexcept_ignore_flags(float %x, float %y) #0 { + ; CHECK-LABEL: name: v_constained_frem_f32_fpexcept_ignore_flags + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: %3:_(s32) = nsz nofpexcept G_STRICT_FREM [[COPY]], [[COPY1]] + ; CHECK: $vgpr0 = COPY %3(s32) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %val = call nsz float @llvm.experimental.constrained.frem.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") + ret float %val +} + +define float @v_constained_fma_f32_fpexcept_ignore_flags(float %x, float %y, float %z) #0 { + ; CHECK-LABEL: name: v_constained_fma_f32_fpexcept_ignore_flags + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: %4:_(s32) = nsz nofpexcept G_STRICT_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; CHECK: $vgpr0 = COPY %4(s32) + ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + %val = call nsz float @llvm.experimental.constrained.fma.f32(float %x, float %y, float %z, metadata !"round.tonearest", metadata !"fpexcept.ignore") + ret float %val +} + +define float @v_constained_sqrt_f32_fpexcept_strict(float %x) #0 { + ; CHECK-LABEL: name: v_constained_sqrt_f32_fpexcept_strict + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[STRICT_FSQRT:%[0-9]+]]:_(s32) = G_STRICT_FSQRT [[COPY]] + ; CHECK: $vgpr0 = COPY [[STRICT_FSQRT]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %val = call float @llvm.experimental.constrained.sqrt.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") + ret float %val +} + +declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) #1 +declare <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float>, <2 x float>, metadata, metadata) #1 +declare <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float>, <3 x float>, metadata, metadata) #1 +declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) #1 +declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata) #1 +declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata) #1 +declare float @llvm.experimental.constrained.frem.f32(float, float, metadata, metadata) #1 +declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata) #1 +declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata) #1 + +attributes #0 = { strictfp } +attributes #1 = { inaccessiblememonly nounwind willreturn }